diff --git a/.github/workflows/metadata-io.yml b/.github/workflows/metadata-io.yml index e5e5f1dae5447d..7018b42949e892 100644 --- a/.github/workflows/metadata-io.yml +++ b/.github/workflows/metadata-io.yml @@ -8,6 +8,7 @@ on: - "li-utils/**" - "metadata-models/**" - "metadata-io/**" + - ".github/workflows/metadata-io.yml" pull_request: branches: - "**" @@ -16,6 +17,7 @@ on: - "li-utils/**" - "metadata-models/**" - "metadata-io/**" + - ".github/workflows/metadata-io.yml" release: types: [published] @@ -52,6 +54,8 @@ jobs: sudo apt-get remove 'dotnet-*' azure-cli || true sudo rm -rf /usr/local/lib/android/ || true sudo docker image prune -a -f || true + - name: Disk Check + run: df -h . && docker images - uses: acryldata/sane-checkout-action@v3 - name: Set up JDK 17 uses: actions/setup-java@v4 diff --git a/build.gradle b/build.gradle index 302b37281798fc..d7fbbb6682e041 100644 --- a/build.gradle +++ b/build.gradle @@ -117,9 +117,9 @@ project.ext.externalDependency = [ 'awsRds':'software.amazon.awssdk:rds:2.18.24', 'cacheApi': 'javax.cache:cache-api:1.1.0', 'commonsCli': 'commons-cli:commons-cli:1.5.0', - 'commonsIo': 'commons-io:commons-io:2.4', + 'commonsIo': 'commons-io:commons-io:2.17.0', 'commonsLang': 'commons-lang:commons-lang:2.6', - 'commonsText': 'org.apache.commons:commons-text:1.10.0', + 'commonsText': 'org.apache.commons:commons-text:1.12.0', 'commonsCollections': 'commons-collections:commons-collections:3.2.2', 'caffeine': 'com.github.ben-manes.caffeine:caffeine:3.1.8', 'datastaxOssNativeProtocol': 'com.datastax.oss:native-protocol:1.5.1', @@ -270,12 +270,12 @@ project.ext.externalDependency = [ 'zookeeper': 'org.apache.zookeeper:zookeeper:3.8.4', 'wire': 'com.squareup.wire:wire-compiler:3.7.1', 'charle': 'com.charleskorn.kaml:kaml:0.53.0', - 'common': 'commons-io:commons-io:2.7', 'jline':'jline:jline:1.4.1', 'jetbrains':' org.jetbrains.kotlin:kotlin-stdlib:1.6.0', 'annotationApi': 'javax.annotation:javax.annotation-api:1.3.2', 'jakartaAnnotationApi': 'jakarta.annotation:jakarta.annotation-api:3.0.0', 'classGraph': 'io.github.classgraph:classgraph:4.8.172', + 'mustache': 'com.github.spullara.mustache.java:compiler:0.9.14' ] allprojects { @@ -391,12 +391,13 @@ subprojects { implementation externalDependency.annotationApi constraints { implementation("com.google.googlejavaformat:google-java-format:$googleJavaFormatVersion") - implementation('io.netty:netty-all:4.1.100.Final') - implementation('org.apache.commons:commons-compress:1.26.0') - implementation('org.apache.velocity:velocity-engine-core:2.3') + implementation('io.netty:netty-all:4.1.114.Final') + implementation('org.apache.commons:commons-compress:1.27.1') + implementation('org.apache.velocity:velocity-engine-core:2.4') implementation('org.hibernate:hibernate-validator:6.0.20.Final') implementation("com.fasterxml.jackson.core:jackson-databind:$jacksonVersion") implementation("com.fasterxml.jackson.core:jackson-dataformat-cbor:$jacksonVersion") + implementation(externalDependency.commonsIo) } } diff --git a/buildSrc/build.gradle b/buildSrc/build.gradle index 0101d1b717205a..9c0e44c41fb0fd 100644 --- a/buildSrc/build.gradle +++ b/buildSrc/build.gradle @@ -22,7 +22,7 @@ dependencies { implementation 'com.google.guava:guava:32.1.2-jre' implementation 'com.fasterxml.jackson.core:jackson-databind:2.13.5' implementation 'com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:2.13.5' - implementation 'commons-io:commons-io:2.11.0' + implementation 'commons-io:commons-io:2.17.0' compileOnly 'org.projectlombok:lombok:1.18.30' annotationProcessor 'org.projectlombok:lombok:1.18.30' diff --git a/datahub-frontend/play.gradle b/datahub-frontend/play.gradle index b14962e5900cd2..ff43e4a93a80f8 100644 --- a/datahub-frontend/play.gradle +++ b/datahub-frontend/play.gradle @@ -20,7 +20,7 @@ dependencies { play('com.nimbusds:nimbus-jose-jwt:8.18') play('com.typesafe.akka:akka-actor_2.12:2.6.20') play(externalDependency.jsonSmart) - play('io.netty:netty-all:4.1.86.Final') + play('io.netty:netty-all:4.1.114.Final') implementation(externalDependency.commonsText) { because("previous versions are vulnerable to CVE-2022-42889") } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/EntityLineageResultResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/EntityLineageResultResolver.java index d872ffad2783db..204e591b1da3ee 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/EntityLineageResultResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/EntityLineageResultResolver.java @@ -25,6 +25,7 @@ import graphql.schema.DataFetchingEnvironment; import io.datahubproject.metadata.services.RestrictedService; import java.util.HashSet; +import java.util.Optional; import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.stream.Collectors; @@ -60,14 +61,16 @@ public CompletableFuture get(DataFetchingEnvironment enviro final LineageInput input = bindArgument(environment.getArgument("input"), LineageInput.class); final LineageDirection lineageDirection = input.getDirection(); - @Nullable final Integer start = input.getStart(); // Optional! - @Nullable final Integer count = input.getCount(); // Optional! - @Nullable final Boolean separateSiblings = input.getSeparateSiblings(); // Optional! - @Nullable final Long startTimeMillis = input.getStartTimeMillis(); // Optional! + // All inputs are optional + @Nullable final Integer start = input.getStart(); + @Nullable final Integer count = input.getCount(); + @Nullable final Boolean separateSiblings = input.getSeparateSiblings(); + @Nullable final Long startTimeMillis = input.getStartTimeMillis(); @Nullable final Long endTimeMillis = - ResolverUtils.getLineageEndTimeMillis( - input.getStartTimeMillis(), input.getEndTimeMillis()); // Optional! + ResolverUtils.getLineageEndTimeMillis(input.getStartTimeMillis(), input.getEndTimeMillis()); + final Boolean includeGhostEntities = + Optional.ofNullable(input.getIncludeGhostEntities()).orElse(false); com.linkedin.metadata.graph.LineageDirection resolvedDirection = com.linkedin.metadata.graph.LineageDirection.valueOf(lineageDirection.toString()); @@ -80,6 +83,8 @@ public CompletableFuture get(DataFetchingEnvironment enviro _siblingGraphService.getLineage( context .getOperationContext() + .withSearchFlags( + searchFlags -> searchFlags.setIncludeSoftDeleted(includeGhostEntities)) .withLineageFlags( flags -> flags @@ -91,6 +96,7 @@ public CompletableFuture get(DataFetchingEnvironment enviro count != null ? count : 100, 1, separateSiblings != null ? input.getSeparateSiblings() : false, + input.getIncludeGhostEntities(), new HashSet<>()); Set restrictedUrns = new HashSet<>(); diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql index fd112c9524ac9a..16ef59114f86c8 100644 --- a/datahub-graphql-core/src/main/resources/entity.graphql +++ b/datahub-graphql-core/src/main/resources/entity.graphql @@ -1258,6 +1258,11 @@ input LineageInput { An optional ending time to filter on """ endTimeMillis: Long + + """ + If enabled, include entities that do not exist or are soft deleted. + """ + includeGhostEntities: Boolean = false } """ @@ -2721,6 +2726,11 @@ enum FabricType { Designates review fabrics """ RVW + + """ + Designates sandbox fabrics + """ + SANDBOX } """ diff --git a/datahub-graphql-core/src/main/resources/search.graphql b/datahub-graphql-core/src/main/resources/search.graphql index 32d73845e1ae40..d0f669f05f9598 100644 --- a/datahub-graphql-core/src/main/resources/search.graphql +++ b/datahub-graphql-core/src/main/resources/search.graphql @@ -521,6 +521,11 @@ enum FilterOperator { """ EQUAL + """ + Represent the relation: field = value (case-insensitive), e.g. platform = HDFS + """ + IEQUAL + """ * Represent the relation: String field is one of the array values to, e.g. name in ["Profile", "Event"] """ @@ -575,6 +580,7 @@ enum FilterOperator { Represent the relation: URN field matches any nested child or parent in addition to the given URN """ RELATED_INCL + } """ diff --git a/datahub-upgrade/build.gradle b/datahub-upgrade/build.gradle index e808f9e87687c0..b783efa09713d1 100644 --- a/datahub-upgrade/build.gradle +++ b/datahub-upgrade/build.gradle @@ -19,6 +19,7 @@ dependencies { implementation project(':metadata-dao-impl:kafka-producer') implementation externalDependency.charle + implementation externalDependency.mustache implementation externalDependency.javaxInject implementation(externalDependency.hadoopClient) { exclude group: 'net.minidev', module: 'json-smart' @@ -83,6 +84,7 @@ dependencies { testImplementation externalDependency.springBootTest testImplementation externalDependency.mockito testImplementation externalDependency.testng + testImplementation 'uk.org.webcompere:system-stubs-testng:2.1.7' testRuntimeOnly externalDependency.logbackClassic constraints { diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/UpgradeManager.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/UpgradeManager.java index 14f36e60d75b2d..75d92e38855420 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/UpgradeManager.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/UpgradeManager.java @@ -8,7 +8,7 @@ public interface UpgradeManager { /** Register an {@link Upgrade} with the manaager. */ - void register(Upgrade upgrade); + UpgradeManager register(Upgrade upgrade); /** Kick off an {@link Upgrade} by identifier. */ UpgradeResult execute( diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/BootstrapMCPConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/BootstrapMCPConfig.java new file mode 100644 index 00000000000000..e7987ba449cfc0 --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/BootstrapMCPConfig.java @@ -0,0 +1,30 @@ +package com.linkedin.datahub.upgrade.config; + +import com.linkedin.datahub.upgrade.system.bootstrapmcps.BootstrapMCP; +import com.linkedin.metadata.entity.EntityService; +import io.datahubproject.metadata.context.OperationContext; +import java.io.IOException; +import javax.annotation.Nonnull; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +@Configuration +public class BootstrapMCPConfig { + + @Nonnull + @Value("${systemUpdate.bootstrap.mcpConfig}") + private String bootstrapMCPConfig; + + @Bean(name = "bootstrapMCPNonBlocking") + public BootstrapMCP bootstrapMCPNonBlocking( + final OperationContext opContext, EntityService entityService) throws IOException { + return new BootstrapMCP(opContext, bootstrapMCPConfig, entityService, false); + } + + @Bean(name = "bootstrapMCPBlocking") + public BootstrapMCP bootstrapMCPBlocking( + final OperationContext opContext, EntityService entityService) throws IOException { + return new BootstrapMCP(opContext, bootstrapMCPConfig, entityService, true); + } +} diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java index 8b33e4e7c21649..f3a4c47c59f0b7 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java @@ -6,6 +6,7 @@ import com.linkedin.datahub.upgrade.system.SystemUpdate; import com.linkedin.datahub.upgrade.system.SystemUpdateBlocking; import com.linkedin.datahub.upgrade.system.SystemUpdateNonBlocking; +import com.linkedin.datahub.upgrade.system.bootstrapmcps.BootstrapMCP; import com.linkedin.datahub.upgrade.system.elasticsearch.steps.DataHubStartupStep; import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.gms.factory.kafka.DataHubKafkaProducerFactory; @@ -31,6 +32,7 @@ import io.datahubproject.metadata.services.RestrictedService; import java.util.List; import javax.annotation.Nonnull; +import lombok.NonNull; import lombok.extern.slf4j.Slf4j; import org.apache.avro.generic.IndexedRecord; import org.apache.kafka.clients.producer.KafkaProducer; @@ -54,21 +56,31 @@ public class SystemUpdateConfig { public SystemUpdate systemUpdate( final List blockingSystemUpgrades, final List nonBlockingSystemUpgrades, - final DataHubStartupStep dataHubStartupStep) { - return new SystemUpdate(blockingSystemUpgrades, nonBlockingSystemUpgrades, dataHubStartupStep); + final DataHubStartupStep dataHubStartupStep, + @Qualifier("bootstrapMCPBlocking") @NonNull final BootstrapMCP bootstrapMCPBlocking, + @Qualifier("bootstrapMCPNonBlocking") @NonNull final BootstrapMCP bootstrapMCPNonBlocking) { + return new SystemUpdate( + blockingSystemUpgrades, + nonBlockingSystemUpgrades, + dataHubStartupStep, + bootstrapMCPBlocking, + bootstrapMCPNonBlocking); } @Bean(name = "systemUpdateBlocking") public SystemUpdateBlocking systemUpdateBlocking( final List blockingSystemUpgrades, - final DataHubStartupStep dataHubStartupStep) { - return new SystemUpdateBlocking(blockingSystemUpgrades, List.of(), dataHubStartupStep); + final DataHubStartupStep dataHubStartupStep, + @Qualifier("bootstrapMCPBlocking") @NonNull final BootstrapMCP bootstrapMCPBlocking) { + return new SystemUpdateBlocking( + blockingSystemUpgrades, dataHubStartupStep, bootstrapMCPBlocking); } @Bean(name = "systemUpdateNonBlocking") public SystemUpdateNonBlocking systemUpdateNonBlocking( - final List nonBlockingSystemUpgrades) { - return new SystemUpdateNonBlocking(List.of(), nonBlockingSystemUpgrades, null); + final List nonBlockingSystemUpgrades, + @Qualifier("bootstrapMCPNonBlocking") @NonNull final BootstrapMCP bootstrapMCPNonBlocking) { + return new SystemUpdateNonBlocking(nonBlockingSystemUpgrades, bootstrapMCPNonBlocking); } @Value("#{systemEnvironment['DATAHUB_REVISION'] ?: '0'}") diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/impl/DefaultUpgradeContext.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/impl/DefaultUpgradeContext.java index c4cfad53624764..b7b86ca046dca5 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/impl/DefaultUpgradeContext.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/impl/DefaultUpgradeContext.java @@ -24,7 +24,7 @@ public class DefaultUpgradeContext implements UpgradeContext { private final List args; private final Map> parsedArgs; - DefaultUpgradeContext( + public DefaultUpgradeContext( @Nonnull OperationContext opContext, Upgrade upgrade, UpgradeReport report, diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/impl/DefaultUpgradeManager.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/impl/DefaultUpgradeManager.java index 27ba6abbc5ba93..443042049e8856 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/impl/DefaultUpgradeManager.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/impl/DefaultUpgradeManager.java @@ -26,8 +26,9 @@ public class DefaultUpgradeManager implements UpgradeManager { private final Map _upgrades = new HashMap<>(); @Override - public void register(@Nonnull Upgrade upgrade) { + public UpgradeManager register(@Nonnull Upgrade upgrade) { _upgrades.put(upgrade.id(), upgrade); + return this; } @Override diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/SystemUpdate.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/SystemUpdate.java index ad1c6c98fa3fd1..ba5ad4372d93f1 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/SystemUpdate.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/SystemUpdate.java @@ -3,6 +3,7 @@ import com.linkedin.datahub.upgrade.Upgrade; import com.linkedin.datahub.upgrade.UpgradeCleanupStep; import com.linkedin.datahub.upgrade.UpgradeStep; +import com.linkedin.datahub.upgrade.system.bootstrapmcps.BootstrapMCP; import com.linkedin.datahub.upgrade.system.elasticsearch.steps.DataHubStartupStep; import java.util.LinkedList; import java.util.List; @@ -22,7 +23,9 @@ public class SystemUpdate implements Upgrade { public SystemUpdate( @NonNull final List blockingSystemUpgrades, @NonNull final List nonBlockingSystemUpgrades, - @Nullable final DataHubStartupStep dataHubStartupStep) { + @Nullable final DataHubStartupStep dataHubStartupStep, + @Nullable final BootstrapMCP bootstrapMCPBlocking, + @Nullable final BootstrapMCP bootstrapMCPNonBlocking) { steps = new LinkedList<>(); cleanupSteps = new LinkedList<>(); @@ -32,11 +35,23 @@ public SystemUpdate( cleanupSteps.addAll( blockingSystemUpgrades.stream().flatMap(up -> up.cleanupSteps().stream()).toList()); + // bootstrap blocking only + if (bootstrapMCPBlocking != null) { + steps.addAll(bootstrapMCPBlocking.steps()); + cleanupSteps.addAll(bootstrapMCPBlocking.cleanupSteps()); + } + // emit system update message if blocking upgrade(s) present if (dataHubStartupStep != null && !blockingSystemUpgrades.isEmpty()) { steps.add(dataHubStartupStep); } + // bootstrap non-blocking only + if (bootstrapMCPNonBlocking != null) { + steps.addAll(bootstrapMCPNonBlocking.steps()); + cleanupSteps.addAll(bootstrapMCPNonBlocking.cleanupSteps()); + } + // add non-blocking upgrades last steps.addAll(nonBlockingSystemUpgrades.stream().flatMap(up -> up.steps().stream()).toList()); cleanupSteps.addAll( diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/SystemUpdateBlocking.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/SystemUpdateBlocking.java index 32841149c467b3..e3b9baffa05688 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/SystemUpdateBlocking.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/SystemUpdateBlocking.java @@ -1,16 +1,16 @@ package com.linkedin.datahub.upgrade.system; +import com.linkedin.datahub.upgrade.system.bootstrapmcps.BootstrapMCP; import com.linkedin.datahub.upgrade.system.elasticsearch.steps.DataHubStartupStep; import java.util.List; import lombok.NonNull; -import org.jetbrains.annotations.Nullable; public class SystemUpdateBlocking extends SystemUpdate { public SystemUpdateBlocking( @NonNull List blockingSystemUpgrades, - @NonNull List nonBlockingSystemUpgrades, - @Nullable DataHubStartupStep dataHubStartupStep) { - super(blockingSystemUpgrades, nonBlockingSystemUpgrades, dataHubStartupStep); + @NonNull DataHubStartupStep dataHubStartupStep, + @NonNull final BootstrapMCP bootstrapMCPBlocking) { + super(blockingSystemUpgrades, List.of(), dataHubStartupStep, bootstrapMCPBlocking, null); } } diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/SystemUpdateNonBlocking.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/SystemUpdateNonBlocking.java index 3309babc1f6cf2..fbc84c518b242e 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/SystemUpdateNonBlocking.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/SystemUpdateNonBlocking.java @@ -1,16 +1,14 @@ package com.linkedin.datahub.upgrade.system; -import com.linkedin.datahub.upgrade.system.elasticsearch.steps.DataHubStartupStep; +import com.linkedin.datahub.upgrade.system.bootstrapmcps.BootstrapMCP; import java.util.List; import lombok.NonNull; -import org.jetbrains.annotations.Nullable; public class SystemUpdateNonBlocking extends SystemUpdate { public SystemUpdateNonBlocking( - @NonNull List blockingSystemUpgrades, @NonNull List nonBlockingSystemUpgrades, - @Nullable DataHubStartupStep dataHubStartupStep) { - super(blockingSystemUpgrades, nonBlockingSystemUpgrades, dataHubStartupStep); + final BootstrapMCP bootstrapMCPNonBlocking) { + super(List.of(), nonBlockingSystemUpgrades, null, null, bootstrapMCPNonBlocking); } } diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/bootstrapmcps/BootstrapMCP.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/bootstrapmcps/BootstrapMCP.java new file mode 100644 index 00000000000000..6264ee6076e6f8 --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/bootstrapmcps/BootstrapMCP.java @@ -0,0 +1,38 @@ +package com.linkedin.datahub.upgrade.system.bootstrapmcps; + +import com.google.common.collect.ImmutableList; +import com.linkedin.datahub.upgrade.Upgrade; +import com.linkedin.datahub.upgrade.UpgradeStep; +import com.linkedin.metadata.entity.EntityService; +import io.datahubproject.metadata.context.OperationContext; +import java.io.IOException; +import java.util.List; +import javax.annotation.Nullable; + +public class BootstrapMCP implements Upgrade { + private final List _steps; + + public BootstrapMCP( + OperationContext opContext, + @Nullable String bootstrapMCPConfig, + EntityService entityService, + boolean isBlocking) + throws IOException { + if (bootstrapMCPConfig != null && !bootstrapMCPConfig.isEmpty()) { + _steps = + BootstrapMCPUtil.generateSteps(opContext, isBlocking, bootstrapMCPConfig, entityService); + } else { + _steps = ImmutableList.of(); + } + } + + @Override + public String id() { + return getClass().getSimpleName(); + } + + @Override + public List steps() { + return _steps; + } +} diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/bootstrapmcps/BootstrapMCPStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/bootstrapmcps/BootstrapMCPStep.java new file mode 100644 index 00000000000000..07835ecd6b3e29 --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/bootstrapmcps/BootstrapMCPStep.java @@ -0,0 +1,95 @@ +package com.linkedin.datahub.upgrade.system.bootstrapmcps; + +import static com.linkedin.metadata.Constants.DATA_HUB_UPGRADE_RESULT_ASPECT_NAME; + +import com.linkedin.common.urn.Urn; +import com.linkedin.datahub.upgrade.UpgradeContext; +import com.linkedin.datahub.upgrade.UpgradeStep; +import com.linkedin.datahub.upgrade.UpgradeStepResult; +import com.linkedin.datahub.upgrade.impl.DefaultUpgradeStepResult; +import com.linkedin.datahub.upgrade.system.bootstrapmcps.model.BootstrapMCPConfigFile; +import com.linkedin.metadata.aspect.batch.AspectsBatch; +import com.linkedin.metadata.boot.BootstrapStep; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.upgrade.DataHubUpgradeState; +import io.datahubproject.metadata.context.OperationContext; +import java.io.IOException; +import java.util.List; +import java.util.function.Function; +import lombok.Getter; +import lombok.extern.slf4j.Slf4j; + +/** + * This bootstrap step is responsible for upgrading DataHub policy documents with new searchable + * fields in ES + */ +@Slf4j +public class BootstrapMCPStep implements UpgradeStep { + private final String upgradeId; + private final Urn upgradeIdUrn; + + private final OperationContext opContext; + private final EntityService entityService; + @Getter private final BootstrapMCPConfigFile.MCPTemplate mcpTemplate; + + public BootstrapMCPStep( + OperationContext opContext, + EntityService entityService, + BootstrapMCPConfigFile.MCPTemplate mcpTemplate) { + this.opContext = opContext; + this.entityService = entityService; + this.mcpTemplate = mcpTemplate; + this.upgradeId = + String.join("-", List.of("bootstrap", mcpTemplate.getName(), mcpTemplate.getVersion())); + this.upgradeIdUrn = BootstrapStep.getUpgradeUrn(this.upgradeId); + } + + @Override + public String id() { + return upgradeId; + } + + @Override + public Function executable() { + return (context) -> { + try { + AspectsBatch batch = BootstrapMCPUtil.generateAspectBatch(opContext, mcpTemplate); + log.info("Ingesting {} MCPs", batch.getItems().size()); + entityService.ingestProposal(opContext, batch, mcpTemplate.isAsync()); + } catch (IOException e) { + log.error("Error bootstrapping MCPs", e); + return new DefaultUpgradeStepResult(id(), DataHubUpgradeState.FAILED); + } + + BootstrapStep.setUpgradeResult(context.opContext(), upgradeIdUrn, entityService); + + return new DefaultUpgradeStepResult(id(), DataHubUpgradeState.SUCCEEDED); + }; + } + + /** + * Returns whether the upgrade should proceed if the step fails after exceeding the maximum + * retries. + */ + @Override + public boolean isOptional() { + return mcpTemplate.isOptional(); + } + + /** Returns whether the upgrade should be skipped. */ + @Override + public boolean skip(UpgradeContext context) { + if (!mcpTemplate.isForce()) { + boolean previouslyRun = + entityService.exists( + context.opContext(), upgradeIdUrn, DATA_HUB_UPGRADE_RESULT_ASPECT_NAME, true); + if (previouslyRun) { + log.info("{} was already run. Skipping.", id()); + } + return previouslyRun; + } else { + log.info("{} forced run.", id()); + return false; + } + } +} diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/bootstrapmcps/BootstrapMCPUtil.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/bootstrapmcps/BootstrapMCPUtil.java new file mode 100644 index 00000000000000..b8b7e828c16c6b --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/bootstrapmcps/BootstrapMCPUtil.java @@ -0,0 +1,183 @@ +package com.linkedin.datahub.upgrade.system.bootstrapmcps; + +import static com.linkedin.metadata.Constants.INGESTION_INFO_ASPECT_NAME; + +import com.datahub.util.RecordUtils; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.github.mustachejava.DefaultMustacheFactory; +import com.github.mustachejava.Mustache; +import com.github.mustachejava.MustacheFactory; +import com.linkedin.common.AuditStamp; +import com.linkedin.datahub.upgrade.UpgradeStep; +import com.linkedin.datahub.upgrade.system.bootstrapmcps.model.BootstrapMCPConfigFile; +import com.linkedin.metadata.aspect.batch.AspectsBatch; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; +import com.linkedin.metadata.utils.AuditStampUtils; +import com.linkedin.metadata.utils.GenericRecordUtils; +import com.linkedin.mxe.GenericAspect; +import com.linkedin.mxe.MetadataChangeProposal; +import io.datahubproject.metadata.context.OperationContext; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.io.StringReader; +import java.io.StringWriter; +import java.nio.charset.StandardCharsets; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.io.IOUtils; +import org.springframework.core.io.ClassPathResource; +import org.springframework.core.io.FileSystemResource; + +@Slf4j +public class BootstrapMCPUtil { + static final MustacheFactory MUSTACHE_FACTORY = new DefaultMustacheFactory(); + + private BootstrapMCPUtil() {} + + static List generateSteps( + @Nonnull OperationContext opContext, + boolean isBlocking, + @Nonnull String bootstrapMCPConfig, + @Nonnull EntityService entityService) + throws IOException { + List steps = + resolveYamlConf(opContext, bootstrapMCPConfig, BootstrapMCPConfigFile.class) + .getBootstrap() + .getTemplates() + .stream() + .filter(cfg -> cfg.isBlocking() == isBlocking) + .map(cfg -> new BootstrapMCPStep(opContext, entityService, cfg)) + .collect(Collectors.toList()); + + log.info( + "Generated {} {} BootstrapMCP steps", + steps.size(), + isBlocking ? "blocking" : "non-blocking"); + return steps; + } + + static AspectsBatch generateAspectBatch( + OperationContext opContext, BootstrapMCPConfigFile.MCPTemplate mcpTemplate) + throws IOException { + + final AuditStamp auditStamp = AuditStampUtils.createDefaultAuditStamp(); + + List mcps = + resolveMCPTemplate(opContext, mcpTemplate, auditStamp).stream() + .map( + mcpObjectNode -> { + ObjectNode aspect = (ObjectNode) mcpObjectNode.remove("aspect"); + + MetadataChangeProposal mcp = + opContext + .getObjectMapper() + .convertValue(mcpObjectNode, MetadataChangeProposal.class); + + try { + String jsonAspect = + opContext + .getObjectMapper() + .writeValueAsString( + convenienceConversions(opContext, mcp.getAspectName(), aspect)); + GenericAspect genericAspect = GenericRecordUtils.serializeAspect(jsonAspect); + mcp.setAspect(genericAspect); + } catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + + return mcp; + }) + .collect(Collectors.toList()); + + return AspectsBatchImpl.builder() + .mcps(mcps, auditStamp, opContext.getRetrieverContext().get()) + .retrieverContext(opContext.getRetrieverContext().get()) + .build(); + } + + static List resolveMCPTemplate( + OperationContext opContext, + BootstrapMCPConfigFile.MCPTemplate mcpTemplate, + AuditStamp auditStamp) + throws IOException { + + String template = loadTemplate(mcpTemplate.getMcps_location()); + Mustache mustache = MUSTACHE_FACTORY.compile(new StringReader(template), mcpTemplate.getName()); + Map scopeValues = resolveValues(opContext, mcpTemplate, auditStamp); + StringWriter writer = new StringWriter(); + mustache.execute(writer, scopeValues); + + return opContext.getYamlMapper().readValue(writer.toString(), new TypeReference<>() {}); + } + + static Map resolveValues( + OperationContext opContext, + BootstrapMCPConfigFile.MCPTemplate mcpTemplate, + AuditStamp auditStamp) + throws IOException { + final Map scopeValues = new HashMap<>(); + + // built-in + scopeValues.put("auditStamp", RecordUtils.toJsonString(auditStamp)); + + if (mcpTemplate.getValues_env() != null + && !mcpTemplate.getValues_env().isEmpty() + && System.getenv().containsKey(mcpTemplate.getValues_env())) { + String envValue = System.getenv(mcpTemplate.getValues_env()); + scopeValues.putAll(opContext.getObjectMapper().readValue(envValue, new TypeReference<>() {})); + } + return scopeValues; + } + + private static String loadTemplate(String source) throws IOException { + log.info("Loading MCP template {}", source); + try (InputStream stream = new ClassPathResource(source).getInputStream()) { + log.info("Found in classpath: {}", source); + return IOUtils.toString(stream, StandardCharsets.UTF_8); + } catch (FileNotFoundException e) { + log.info("{} was NOT found in the classpath.", source); + try (InputStream stream = new FileSystemResource(source).getInputStream()) { + log.info("Found in filesystem: {}", source); + return IOUtils.toString(stream, StandardCharsets.UTF_8); + } catch (Exception e2) { + throw new IllegalArgumentException(String.format("Could not resolve %s", source)); + } + } + } + + static T resolveYamlConf(OperationContext opContext, String source, Class clazz) + throws IOException { + log.info("Resolving {} to {}", source, clazz.getSimpleName()); + try (InputStream stream = new ClassPathResource(source).getInputStream()) { + log.info("Found in classpath: {}", source); + return opContext.getYamlMapper().readValue(stream, clazz); + } catch (FileNotFoundException e) { + log.info("{} was NOT found in the classpath.", source); + try (InputStream stream = new FileSystemResource(source).getInputStream()) { + log.info("Found in filesystem: {}", source); + return opContext.getYamlMapper().readValue(stream, clazz); + } catch (Exception e2) { + throw new IllegalArgumentException(String.format("Could not resolve %s", source)); + } + } + } + + private static ObjectNode convenienceConversions( + OperationContext opContext, String aspectName, ObjectNode aspectObjectNode) + throws JsonProcessingException { + if (INGESTION_INFO_ASPECT_NAME.equals(aspectName)) { + ObjectNode config = (ObjectNode) aspectObjectNode.get("config"); + ObjectNode recipe = (ObjectNode) config.remove("recipe"); + config.put("recipe", opContext.getObjectMapper().writeValueAsString(recipe)); + } + return aspectObjectNode; + } +} diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/bootstrapmcps/model/BootstrapMCPConfigFile.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/bootstrapmcps/model/BootstrapMCPConfigFile.java new file mode 100644 index 00000000000000..8fd3dd7c7d8975 --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/bootstrapmcps/model/BootstrapMCPConfigFile.java @@ -0,0 +1,40 @@ +package com.linkedin.datahub.upgrade.system.bootstrapmcps.model; + +import java.util.List; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@AllArgsConstructor +@NoArgsConstructor +@Data +@Builder +public class BootstrapMCPConfigFile { + private Bootstrap bootstrap; + + @AllArgsConstructor + @NoArgsConstructor + @Data + @Builder + public static class Bootstrap { + private List templates; + } + + @AllArgsConstructor + @NoArgsConstructor + @Data + @Builder + public static class MCPTemplate { + @Nonnull private String name; + @Nonnull private String version; + @Builder.Default private boolean force = false; + @Builder.Default private boolean blocking = false; + @Builder.Default private boolean async = true; + @Builder.Default private boolean optional = false; + @Nonnull private String mcps_location; + @Nullable private String values_env; + } +} diff --git a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeBlockingTest.java b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeBlockingTest.java new file mode 100644 index 00000000000000..0672061c665c1a --- /dev/null +++ b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeBlockingTest.java @@ -0,0 +1,48 @@ +package com.linkedin.datahub.upgrade; + +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; +import static org.testng.AssertJUnit.assertNotNull; + +import com.linkedin.datahub.upgrade.system.SystemUpdateBlocking; +import com.linkedin.datahub.upgrade.system.bootstrapmcps.BootstrapMCPStep; +import java.util.List; +import java.util.stream.Collectors; +import javax.inject.Named; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.test.context.ActiveProfiles; +import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; +import org.testng.annotations.Test; + +@ActiveProfiles("test") +@SpringBootTest( + classes = {UpgradeCliApplication.class, UpgradeCliApplicationTestConfiguration.class}, + args = {"-u", "SystemUpdateBlocking"}) +public class DatahubUpgradeBlockingTest extends AbstractTestNGSpringContextTests { + + @Autowired + @Named("systemUpdateBlocking") + private SystemUpdateBlocking systemUpdateBlocking; + + @Test + public void testNBlockingBootstrapMCP() { + assertNotNull(systemUpdateBlocking); + + List mcpTemplate = + systemUpdateBlocking.steps().stream() + .filter(update -> update instanceof BootstrapMCPStep) + .map(update -> (BootstrapMCPStep) update) + .toList(); + + assertFalse(mcpTemplate.isEmpty()); + assertTrue( + mcpTemplate.stream().allMatch(update -> update.getMcpTemplate().isBlocking()), + String.format( + "Found non-blocking step (expected blocking only): %s", + mcpTemplate.stream() + .filter(update -> !update.getMcpTemplate().isBlocking()) + .map(update -> update.getMcpTemplate().getName()) + .collect(Collectors.toSet()))); + } +} diff --git a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeNonBlockingTest.java b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeNonBlockingTest.java index df27d33f3a117e..845d8185273432 100644 --- a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeNonBlockingTest.java +++ b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeNonBlockingTest.java @@ -5,10 +5,13 @@ import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; import static org.testng.AssertJUnit.assertNotNull; import com.linkedin.datahub.upgrade.impl.DefaultUpgradeManager; import com.linkedin.datahub.upgrade.system.SystemUpdateNonBlocking; +import com.linkedin.datahub.upgrade.system.bootstrapmcps.BootstrapMCPStep; import com.linkedin.datahub.upgrade.system.graph.vianodes.ReindexDataJobViaNodesCLL; import com.linkedin.metadata.boot.kafka.MockSystemUpdateDeserializer; import com.linkedin.metadata.boot.kafka.MockSystemUpdateSerializer; @@ -22,6 +25,7 @@ import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.test.metadata.context.TestOperationContexts; import java.util.List; +import java.util.stream.Collectors; import javax.inject.Named; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.context.SpringBootTest; @@ -41,7 +45,7 @@ args = {"-u", "SystemUpdateNonBlocking"}) public class DatahubUpgradeNonBlockingTest extends AbstractTestNGSpringContextTests { - @Autowired(required = false) + @Autowired @Named("systemUpdateNonBlocking") private SystemUpdateNonBlocking systemUpdateNonBlocking; @@ -84,8 +88,7 @@ public void testReindexDataJobViaNodesCLLPaging() { ReindexDataJobViaNodesCLL cllUpgrade = new ReindexDataJobViaNodesCLL(opContext, mockService, mockAspectDao, true, 10, 0, 0); - SystemUpdateNonBlocking upgrade = - new SystemUpdateNonBlocking(List.of(), List.of(cllUpgrade), null); + SystemUpdateNonBlocking upgrade = new SystemUpdateNonBlocking(List.of(cllUpgrade), null); DefaultUpgradeManager manager = new DefaultUpgradeManager(); manager.register(upgrade); manager.execute( @@ -101,4 +104,23 @@ public void testReindexDataJobViaNodesCLLPaging() { .aspectName("dataJobInputOutput") .urnLike("urn:li:dataJob:%"))); } + + @Test + public void testNonBlockingBootstrapMCP() { + List mcpTemplate = + systemUpdateNonBlocking.steps().stream() + .filter(update -> update instanceof BootstrapMCPStep) + .map(update -> (BootstrapMCPStep) update) + .toList(); + + assertFalse(mcpTemplate.isEmpty()); + assertTrue( + mcpTemplate.stream().noneMatch(update -> update.getMcpTemplate().isBlocking()), + String.format( + "Found blocking step: %s (expected non-blocking only)", + mcpTemplate.stream() + .filter(update -> update.getMcpTemplate().isBlocking()) + .map(update -> update.getMcpTemplate().getName()) + .collect(Collectors.toSet()))); + } } diff --git a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/system/bootstrapmcps/BootstrapMCPUtilTest.java b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/system/bootstrapmcps/BootstrapMCPUtilTest.java new file mode 100644 index 00000000000000..68023a084bbd20 --- /dev/null +++ b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/system/bootstrapmcps/BootstrapMCPUtilTest.java @@ -0,0 +1,224 @@ +package com.linkedin.datahub.upgrade.system.bootstrapmcps; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; + +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.linkedin.common.AuditStamp; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.datahub.upgrade.system.bootstrapmcps.model.BootstrapMCPConfigFile; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.ingestion.DataHubIngestionSourceInfo; +import com.linkedin.metadata.aspect.batch.AspectsBatch; +import com.linkedin.metadata.aspect.batch.MCPItem; +import com.linkedin.metadata.utils.AuditStampUtils; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.test.metadata.context.TestOperationContexts; +import java.io.IOException; +import java.util.List; +import org.testng.annotations.Listeners; +import org.testng.annotations.Test; +import uk.org.webcompere.systemstubs.environment.EnvironmentVariables; +import uk.org.webcompere.systemstubs.testng.SystemStub; +import uk.org.webcompere.systemstubs.testng.SystemStubsListener; + +@Listeners(SystemStubsListener.class) +public class BootstrapMCPUtilTest { + static final OperationContext OP_CONTEXT = + TestOperationContexts.systemContextNoSearchAuthorization(); + private static final String DATAHUB_TEST_VALUES_ENV = "DATAHUB_TEST_VALUES_ENV"; + private static final AuditStamp TEST_AUDIT_STAMP = AuditStampUtils.createDefaultAuditStamp(); + + @SystemStub private EnvironmentVariables environmentVariables; + + @Test + public void testResolveYamlConf() throws IOException { + BootstrapMCPConfigFile initConfig = + BootstrapMCPUtil.resolveYamlConf( + OP_CONTEXT, "bootstrapmcp/test.yaml", BootstrapMCPConfigFile.class); + assertEquals(initConfig.getBootstrap().getTemplates().size(), 1); + + BootstrapMCPConfigFile.MCPTemplate template = initConfig.getBootstrap().getTemplates().get(0); + assertEquals(template.getName(), "datahub-test"); + assertEquals(template.getVersion(), "v10"); + assertFalse(template.isForce()); + assertFalse(template.isBlocking()); + assertTrue(template.isAsync()); + assertFalse(template.isOptional()); + assertEquals(template.getMcps_location(), "bootstrapmcp/datahub-test-mcp.yaml"); + assertEquals(template.getValues_env(), "DATAHUB_TEST_VALUES_ENV"); + } + + @Test + public void testResolveMCPTemplateDefaults() throws IOException { + environmentVariables.remove(DATAHUB_TEST_VALUES_ENV); + + BootstrapMCPConfigFile.MCPTemplate template = + BootstrapMCPUtil.resolveYamlConf( + OP_CONTEXT, "bootstrapmcp/test.yaml", BootstrapMCPConfigFile.class) + .getBootstrap() + .getTemplates() + .get(0); + + List mcpObjectNodes = + BootstrapMCPUtil.resolveMCPTemplate(OP_CONTEXT, template, TEST_AUDIT_STAMP); + assertEquals(mcpObjectNodes.size(), 1); + + ObjectNode mcp = mcpObjectNodes.get(0); + assertEquals(mcp.get("entityType").asText(), "dataHubIngestionSource"); + assertEquals(mcp.get("entityUrn").asText(), "urn:li:dataHubIngestionSource:datahub-test"); + assertEquals(mcp.get("aspectName").asText(), "dataHubIngestionSourceInfo"); + assertEquals(mcp.get("changeType").asText(), "UPSERT"); + + ObjectNode aspect = (ObjectNode) mcp.get("aspect"); + assertEquals(aspect.get("type").asText(), "datahub-gc"); + assertEquals(aspect.get("name").asText(), "datahub-test"); + + ObjectNode schedule = (ObjectNode) aspect.get("schedule"); + assertEquals(schedule.get("timezone").asText(), "UTC"); + assertEquals(schedule.get("interval").asText(), "0 0 * * *"); + + ObjectNode config = (ObjectNode) aspect.get("config"); + assertTrue(config.get("extraArgs").isObject()); + assertTrue(config.get("debugMode").isBoolean()); + assertEquals(config.get("executorId").asText(), "default"); + + ObjectNode recipe = (ObjectNode) config.get("recipe"); + ObjectNode source = (ObjectNode) recipe.get("source"); + assertEquals(source.get("type").asText(), "datahub-gc"); + + ObjectNode sourceConfig = (ObjectNode) source.get("config"); + assertFalse(sourceConfig.get("cleanup_expired_tokens").asBoolean()); + assertTrue(sourceConfig.get("truncate_indices").asBoolean()); + + ObjectNode dataprocessCleanup = (ObjectNode) sourceConfig.get("dataprocess_cleanup"); + assertEquals(dataprocessCleanup.get("retention_days").asInt(), 10); + assertTrue(dataprocessCleanup.get("delete_empty_data_jobs").asBoolean()); + assertTrue(dataprocessCleanup.get("delete_empty_data_flows").asBoolean()); + assertFalse(dataprocessCleanup.get("hard_delete_entities").asBoolean()); + assertEquals(dataprocessCleanup.get("keep_last_n").asInt(), 5); + + ObjectNode softDeletedEntitiesCleanup = + (ObjectNode) sourceConfig.get("soft_deleted_entities_cleanup"); + assertEquals(softDeletedEntitiesCleanup.get("retention_days").asInt(), 10); + + assertTrue(mcp.get("headers").isObject()); + } + + @Test + public void testResolveMCPTemplateOverride() throws IOException { + environmentVariables.set( + "DATAHUB_TEST_VALUES_ENV", + "{\n" + + " \"ingestion\": {\n" + + " \"name\": \"name-override\"\n" + + " },\n" + + " \"schedule\": {\n" + + " \"timezone\": \"America/Chicago\",\n" + + " \"interval\": \"9 9 * * *\"\n" + + " },\n" + + " \"cleanup_expired_tokens\": true,\n" + + " \"truncate_indices\": false,\n" + + " \"dataprocess_cleanup\": {\n" + + " \"retention_days\": 99,\n" + + " \"delete_empty_data_jobs\": false,\n" + + " \"delete_empty_data_flows\": false,\n" + + " \"hard_delete_entities\": true,\n" + + " \"keep_last_n\": 50\n" + + " },\n" + + " \"soft_deleted_entities_cleanup\": {\n" + + " \"retention_days\": 100\n" + + " }\n" + + "}"); + + BootstrapMCPConfigFile.MCPTemplate template = + BootstrapMCPUtil.resolveYamlConf( + OP_CONTEXT, "bootstrapmcp/test.yaml", BootstrapMCPConfigFile.class) + .getBootstrap() + .getTemplates() + .get(0); + + List mcpObjectNodes = + BootstrapMCPUtil.resolveMCPTemplate(OP_CONTEXT, template, TEST_AUDIT_STAMP); + assertEquals(mcpObjectNodes.size(), 1); + + ObjectNode mcp = mcpObjectNodes.get(0); + assertEquals(mcp.get("entityType").asText(), "dataHubIngestionSource"); + assertEquals(mcp.get("entityUrn").asText(), "urn:li:dataHubIngestionSource:datahub-test"); + assertEquals(mcp.get("aspectName").asText(), "dataHubIngestionSourceInfo"); + assertEquals(mcp.get("changeType").asText(), "UPSERT"); + + ObjectNode aspect = (ObjectNode) mcp.get("aspect"); + assertEquals(aspect.get("type").asText(), "datahub-gc"); + assertEquals(aspect.get("name").asText(), "name-override"); + + ObjectNode schedule = (ObjectNode) aspect.get("schedule"); + assertEquals(schedule.get("timezone").asText(), "America/Chicago"); + assertEquals(schedule.get("interval").asText(), "9 9 * * *"); + + ObjectNode config = (ObjectNode) aspect.get("config"); + assertTrue(config.get("extraArgs").isObject()); + assertTrue(config.get("debugMode").isBoolean()); + assertEquals(config.get("executorId").asText(), "default"); + + ObjectNode recipe = (ObjectNode) config.get("recipe"); + ObjectNode source = (ObjectNode) recipe.get("source"); + assertEquals(source.get("type").asText(), "datahub-gc"); + + ObjectNode sourceConfig = (ObjectNode) source.get("config"); + assertTrue(sourceConfig.get("cleanup_expired_tokens").asBoolean()); + assertFalse(sourceConfig.get("truncate_indices").asBoolean()); + + ObjectNode dataprocessCleanup = (ObjectNode) sourceConfig.get("dataprocess_cleanup"); + assertEquals(dataprocessCleanup.get("retention_days").asInt(), 99); + assertFalse(dataprocessCleanup.get("delete_empty_data_jobs").asBoolean()); + assertFalse(dataprocessCleanup.get("delete_empty_data_flows").asBoolean()); + assertTrue(dataprocessCleanup.get("hard_delete_entities").asBoolean()); + assertEquals(dataprocessCleanup.get("keep_last_n").asInt(), 50); + + ObjectNode softDeletedEntitiesCleanup = + (ObjectNode) sourceConfig.get("soft_deleted_entities_cleanup"); + assertEquals(softDeletedEntitiesCleanup.get("retention_days").asInt(), 100); + + assertTrue(mcp.get("headers").isObject()); + } + + @Test + public void testMCPBatch() throws IOException { + environmentVariables.remove(DATAHUB_TEST_VALUES_ENV); + + BootstrapMCPConfigFile.MCPTemplate template = + BootstrapMCPUtil.resolveYamlConf( + OP_CONTEXT, "bootstrapmcp/test.yaml", BootstrapMCPConfigFile.class) + .getBootstrap() + .getTemplates() + .get(0); + + AspectsBatch batch = BootstrapMCPUtil.generateAspectBatch(OP_CONTEXT, template); + assertEquals(batch.getMCPItems().size(), 1); + + MCPItem item = batch.getMCPItems().get(0); + assertEquals(item.getUrn(), UrnUtils.getUrn("urn:li:dataHubIngestionSource:datahub-test")); + assertEquals(item.getAspectName(), "dataHubIngestionSourceInfo"); + assertEquals(item.getChangeType(), ChangeType.UPSERT); + + DataHubIngestionSourceInfo ingestionSource = item.getAspect(DataHubIngestionSourceInfo.class); + + assertEquals(ingestionSource.getName(), "datahub-test"); + assertEquals(ingestionSource.getType(), "datahub-gc"); + + assertFalse(ingestionSource.getConfig().isDebugMode()); + assertEquals(ingestionSource.getConfig().getExecutorId(), "default"); + + assertEquals(ingestionSource.getSchedule().getTimezone(), "UTC"); + assertEquals(ingestionSource.getSchedule().getInterval(), "0 0 * * *"); + + assertEquals( + OP_CONTEXT.getObjectMapper().readTree(ingestionSource.getConfig().getRecipe()), + OP_CONTEXT + .getObjectMapper() + .readTree( + "{\"source\":{\"type\":\"datahub-gc\",\"config\":{\"cleanup_expired_tokens\":false,\"truncate_indices\":true,\"dataprocess_cleanup\":{\"retention_days\":10,\"delete_empty_data_jobs\":true,\"delete_empty_data_flows\":true,\"hard_delete_entities\":false,\"keep_last_n\":5},\"soft_deleted_entities_cleanup\":{\"retention_days\":10}}}}")); + } +} diff --git a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/system/bootstrapmcps/DataTypesTest.java b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/system/bootstrapmcps/DataTypesTest.java new file mode 100644 index 00000000000000..156b5347e544da --- /dev/null +++ b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/system/bootstrapmcps/DataTypesTest.java @@ -0,0 +1,79 @@ +package com.linkedin.datahub.upgrade.system.bootstrapmcps; + +import static com.linkedin.datahub.upgrade.system.bootstrapmcps.BootstrapMCPUtilTest.OP_CONTEXT; +import static com.linkedin.metadata.Constants.*; +import static org.mockito.Mockito.*; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; + +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.datahub.upgrade.Upgrade; +import com.linkedin.datahub.upgrade.UpgradeManager; +import com.linkedin.datahub.upgrade.UpgradeResult; +import com.linkedin.datahub.upgrade.impl.DefaultUpgradeManager; +import com.linkedin.datatype.DataTypeInfo; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; +import com.linkedin.upgrade.DataHubUpgradeState; +import io.datahubproject.metadata.context.OperationContext; +import java.io.IOException; +import java.util.List; +import org.testng.annotations.Test; + +public class DataTypesTest { + + private static final Urn TEST_DATA_TYPE_URN = UrnUtils.getUrn("urn:li:dataType:datahub.test"); + + @Test + public void testExecuteValidDataTypesNoExistingDataTypes() throws Exception { + final EntityService entityService = mock(EntityService.class); + final UpgradeManager upgradeManager = + loadContext("bootstrapmcp_datatypes/test_valid.yaml", entityService); + + // run the upgrade + upgradeManager.execute(OP_CONTEXT, "BootstrapMCP", List.of()); + + DataTypeInfo expectedResult = new DataTypeInfo(); + expectedResult.setDescription("Test Description"); + expectedResult.setDisplayName("Test Name"); + expectedResult.setQualifiedName("datahub.test"); + + verify(entityService, times(1)) + .ingestProposal(any(OperationContext.class), any(AspectsBatchImpl.class), eq(true)); + } + + @Test + public void testExecuteInvalidJson() throws Exception { + final EntityService entityService = mock(EntityService.class); + final UpgradeManager upgradeManager = + loadContext("bootstrapmcp_datatypes/test_invalid.yaml", entityService); + + UpgradeResult upgradeResult = upgradeManager.execute(OP_CONTEXT, "BootstrapMCP", List.of()); + + assertEquals(upgradeResult.result(), DataHubUpgradeState.FAILED); + + // verify expected existence check + verify(entityService) + .exists( + any(OperationContext.class), + eq(UrnUtils.getUrn("urn:li:dataHubUpgrade:bootstrap-data-types-v1")), + eq("dataHubUpgradeResult"), + anyBoolean()); + + // Verify no additional interactions + verifyNoMoreInteractions(entityService); + } + + private static UpgradeManager loadContext(String configFile, EntityService entityService) + throws IOException { + // hasn't run + when(entityService.exists( + any(OperationContext.class), any(Urn.class), eq("dataHubUpgradeResult"), anyBoolean())) + .thenReturn(false); + + Upgrade bootstrapUpgrade = new BootstrapMCP(OP_CONTEXT, configFile, entityService, false); + assertFalse(bootstrapUpgrade.steps().isEmpty()); + return new DefaultUpgradeManager().register(bootstrapUpgrade); + } +} diff --git a/datahub-upgrade/src/test/resources/bootstrapmcp/datahub-test-mcp.yaml b/datahub-upgrade/src/test/resources/bootstrapmcp/datahub-test-mcp.yaml new file mode 100644 index 00000000000000..d049a807ac1d88 --- /dev/null +++ b/datahub-upgrade/src/test/resources/bootstrapmcp/datahub-test-mcp.yaml @@ -0,0 +1,29 @@ +- entityType: dataHubIngestionSource + entityUrn: urn:li:dataHubIngestionSource:datahub-test + aspectName: dataHubIngestionSourceInfo + changeType: UPSERT + aspect: + type: 'datahub-gc' + name: '{{ingestion.name}}{{^ingestion.name}}datahub-test{{/ingestion.name}}' + schedule: + timezone: '{{schedule.timezone}}{{^schedule.timezone}}UTC{{/schedule.timezone}}' + interval: '{{schedule.interval}}{{^schedule.interval}}0 0 * * *{{/schedule.interval}}' + config: + recipe: + source: + type: 'datahub-gc' + config: + cleanup_expired_tokens: {{cleanup_expired_tokens}}{{^cleanup_expired_tokens}}false{{/cleanup_expired_tokens}} + truncate_indices: {{truncate_indices}}{{^truncate_indices}}true{{/truncate_indices}} + dataprocess_cleanup: + retention_days: {{dataprocess_cleanup.retention_days}}{{^dataprocess_cleanup.retention_days}}10{{/dataprocess_cleanup.retention_days}} + delete_empty_data_jobs: {{dataprocess_cleanup.delete_empty_data_jobs}}{{^dataprocess_cleanup.delete_empty_data_jobs}}true{{/dataprocess_cleanup.delete_empty_data_jobs}} + delete_empty_data_flows: {{dataprocess_cleanup.delete_empty_data_flows}}{{^dataprocess_cleanup.delete_empty_data_flows}}true{{/dataprocess_cleanup.delete_empty_data_flows}} + hard_delete_entities: {{dataprocess_cleanup.hard_delete_entities}}{{^dataprocess_cleanup.hard_delete_entities}}false{{/dataprocess_cleanup.hard_delete_entities}} + keep_last_n: {{dataprocess_cleanup.keep_last_n}}{{^dataprocess_cleanup.keep_last_n}}5{{/dataprocess_cleanup.keep_last_n}} + soft_deleted_entities_cleanup: + retention_days: {{soft_deleted_entities_cleanup.retention_days}}{{^soft_deleted_entities_cleanup.retention_days}}10{{/soft_deleted_entities_cleanup.retention_days}} + extraArgs: {} + debugMode: false + executorId: default + headers: {} \ No newline at end of file diff --git a/datahub-upgrade/src/test/resources/bootstrapmcp/test.yaml b/datahub-upgrade/src/test/resources/bootstrapmcp/test.yaml new file mode 100644 index 00000000000000..649cc09632fc2a --- /dev/null +++ b/datahub-upgrade/src/test/resources/bootstrapmcp/test.yaml @@ -0,0 +1,9 @@ +bootstrap: + templates: + - name: datahub-test + version: v10 + # force: false + # blocking: false + # async: true + mcps_location: "bootstrapmcp/datahub-test-mcp.yaml" + values_env: "DATAHUB_TEST_VALUES_ENV" \ No newline at end of file diff --git a/datahub-upgrade/src/test/resources/bootstrapmcp_datatypes/test_data_types_invalid.yaml b/datahub-upgrade/src/test/resources/bootstrapmcp_datatypes/test_data_types_invalid.yaml new file mode 100644 index 00000000000000..4d4970e510380a --- /dev/null +++ b/datahub-upgrade/src/test/resources/bootstrapmcp_datatypes/test_data_types_invalid.yaml @@ -0,0 +1,8 @@ +- entityUrn: urn:li:dataType:datahub.test + entityType: dataType + aspectName: dataTypeInfo + changeType: UPSERT + aspect: + badField: + qualifiedName: datahub.test + description: Test Description \ No newline at end of file diff --git a/datahub-upgrade/src/test/resources/bootstrapmcp_datatypes/test_data_types_valid.yaml b/datahub-upgrade/src/test/resources/bootstrapmcp_datatypes/test_data_types_valid.yaml new file mode 100644 index 00000000000000..902315ab85dc8c --- /dev/null +++ b/datahub-upgrade/src/test/resources/bootstrapmcp_datatypes/test_data_types_valid.yaml @@ -0,0 +1,8 @@ +- entityUrn: urn:li:dataType:datahub.test + entityType: dataType + aspectName: dataTypeInfo + changeType: UPSERT + aspect: + qualifiedName: datahub.test + displayName: Test Name + description: Test Description \ No newline at end of file diff --git a/datahub-upgrade/src/test/resources/bootstrapmcp_datatypes/test_invalid.yaml b/datahub-upgrade/src/test/resources/bootstrapmcp_datatypes/test_invalid.yaml new file mode 100644 index 00000000000000..07654ff3c299ee --- /dev/null +++ b/datahub-upgrade/src/test/resources/bootstrapmcp_datatypes/test_invalid.yaml @@ -0,0 +1,5 @@ +bootstrap: + templates: + - name: data-types + version: v1 + mcps_location: "bootstrapmcp_datatypes/test_data_types_invalid.yaml" \ No newline at end of file diff --git a/datahub-upgrade/src/test/resources/bootstrapmcp_datatypes/test_valid.yaml b/datahub-upgrade/src/test/resources/bootstrapmcp_datatypes/test_valid.yaml new file mode 100644 index 00000000000000..05b769d22ddf37 --- /dev/null +++ b/datahub-upgrade/src/test/resources/bootstrapmcp_datatypes/test_valid.yaml @@ -0,0 +1,5 @@ +bootstrap: + templates: + - name: data-types + version: v1 + mcps_location: "bootstrapmcp_datatypes/test_data_types_valid.yaml" \ No newline at end of file diff --git a/datahub-web-react/yarn.lock b/datahub-web-react/yarn.lock index 8d5899d9891f18..9dc563c958dd19 100644 --- a/datahub-web-react/yarn.lock +++ b/datahub-web-react/yarn.lock @@ -5964,9 +5964,9 @@ fast-levenshtein@^2.0.6: integrity sha1-PYpcZog6FqMMqGQ+hR8Zuqd5eRc= fast-loops@^1.1.3: - version "1.1.3" - resolved "https://registry.yarnpkg.com/fast-loops/-/fast-loops-1.1.3.tgz#ce96adb86d07e7bf9b4822ab9c6fac9964981f75" - integrity sha512-8EZzEP0eKkEEVX+drtd9mtuQ+/QrlfW/5MlwcwK5Nds6EkZ/tRzEexkzUY2mIssnAyVLT+TKHuRXmFNNXYUd6g== + version "1.1.4" + resolved "https://registry.yarnpkg.com/fast-loops/-/fast-loops-1.1.4.tgz#61bc77d518c0af5073a638c6d9d5c7683f069ce2" + integrity sha512-8dbd3XWoKCTms18ize6JmQF1SFnnfj5s0B7rRry22EofgMu7B6LKHVh+XfFqFGsqnbH54xgeO83PzpKI+ODhlg== fast-querystring@^1.1.1: version "1.1.2" @@ -9504,9 +9504,9 @@ rimraf@~2.6.2: glob "^7.1.3" rollup@^3.27.1: - version "3.29.4" - resolved "https://registry.yarnpkg.com/rollup/-/rollup-3.29.4.tgz#4d70c0f9834146df8705bfb69a9a19c9e1109981" - integrity sha512-oWzmBZwvYrU0iJHtDmhsm662rC15FRXmcjCk1xD771dFDx5jJ02ufAQQTn0etB2emNk4J9EZg/yWKpsn9BWGRw== + version "3.29.5" + resolved "https://registry.yarnpkg.com/rollup/-/rollup-3.29.5.tgz#8a2e477a758b520fb78daf04bca4c522c1da8a54" + integrity sha512-GVsDdsbJzzy4S/v3dqWPJ7EfvZJfCHiDqe80IyrF59LYuP+e6U1LJoUqeuqRbwAWoMNoXivMNeNAOf5E22VA1w== optionalDependencies: fsevents "~2.3.2" diff --git a/docker/build.gradle b/docker/build.gradle index 20608bd8578270..c09bf16d1d7242 100644 --- a/docker/build.gradle +++ b/docker/build.gradle @@ -108,6 +108,8 @@ dockerCompose { environment.put "ACTIONS_EXTRA_PACKAGES", 'acryl-datahub-actions[executor] acryl-datahub-actions' environment.put "ACTIONS_CONFIG", 'https://raw.githubusercontent.com/acryldata/datahub-actions/main/docker/config/executor.yaml' environment.put 'DATAHUB_TELEMETRY_ENABLED', 'false' // disabled when built locally + // disabled for spark-lineage smoke-test + environment.put 'DATAHUB_LOCAL_COMMON_ENV', "${rootProject.project(':metadata-integration:java:spark-lineage-legacy').projectDir}/spark-smoke-test/smoke-gms.env" useComposeFiles = ['profiles/docker-compose.yml'] projectName = 'datahub' diff --git a/docs-website/adoptionStoriesIndexes.json b/docs-website/adoptionStoriesIndexes.json index 79ddf81e4ed6fd..f7b78e6cab8b2a 100644 --- a/docs-website/adoptionStoriesIndexes.json +++ b/docs-website/adoptionStoriesIndexes.json @@ -93,6 +93,42 @@ "category": "B2B & B2C", "description": "“We looked around for data catalog tool, and DataHub was a clear winner.”

Zynga levels up data management using DataHub, highlighting its role in enhancing data management, tracing data lineage, and ensuring data quality." }, + { + "name": "Miro", + "slug": "miro", + "imageUrl": "/img/logos/companies/miro.png", + "imageSize": "medium", + "link": "https://miro.com/careers/life-at-miro/tech/data-products-reliability-the-power-of-metadata/", + "linkType": "blog", + "tagline": "Data Products Reliability: The Power of Metadata", + "category": "B2B & B2C", + "platform": "cloud", + "description": "\"Leveraging our Datahub catalog, we have centralized metadata access for all data products. This integration eliminates the dependency on Airflow metadata alone for defining contracts, enabling flexible definitions for both building blocks and business metrics.\"" + }, + { + "name": "Foursquare", + "slug": "foursquare", + "imageUrl": "/img/logos/companies/foursquare.png", + "imageSize": "medium", + "link": "https://location.foursquare.com/resources/blog/leadership/foursquare-data-platform-from-fragmentation-to-control-plane/", + "linkType": "blog", + "tagline": "Foursquare Data Platform: From Fragmentation to Control (Plane)", + "category": "B2B & B2C", + "platform": "cloud", + "description": "\"After evaluating several options (...) we chose DataHub as the control plane for our data platform and partnered with Acryl Data, which offers DataHub Cloud, a premium hosted version of DataHub. (...) Another important thing that worked in DataHub’s favor is the rich and flexible taxonomy it offered for modeling the various aspects of a data platform.\"" + }, + { + "name": "Deutsche Telekom", + "slug": "deutsche-telekom", + "imageUrl": "/img/logos/companies/deutsche-telekom.png", + "imageSize": "medium", + "link": "https://karanjindal95.medium.com/from-chaos-to-clarity-how-datahub-transformed-our-data-utilization-5b5151efd34a", + "linkType": "blog", + "tagline": "From Chaos to Clarity: How DataHub Transformed our Data Utilization", + "category": "B2B & B2C", + "platform": "cloud", + "description": "\"The DataHub data catalog significantly supported our AI/ML team’s efforts by offering seamless access to detailed column descriptions and table schemas through its APIs. This comprehensive data accessibility enabled the team to efficiently develop a text-to-SQL tool, which translates natural language queries into SQL commands.\"" + }, { "name": "Chime", "slug": "chime", diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index fe1ee9e6236ab7..12d279b6e1e815 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -635,6 +635,7 @@ module.exports = { "docs/advanced/browse-paths-upgrade", "docs/browseV2/browse-paths-v2", "docs/plugins", + "docs/advanced/bootstrap-mcps", ], }, { diff --git a/docs-website/src/pages/cloud/DemoForm/index.jsx b/docs-website/src/pages/cloud/DemoForm/index.jsx index 28777e722e962d..fcaa1e129d9bed 100644 --- a/docs-website/src/pages/cloud/DemoForm/index.jsx +++ b/docs-website/src/pages/cloud/DemoForm/index.jsx @@ -80,9 +80,9 @@ const DemoForm = ({ formId }) => {
-
Book a free Demo
+
Book a Demo
- Schedule a personalized demo and get a free a trial. + Schedule your personalized demo and get a free trial.
{/* Use unique ID */} diff --git a/docs-website/static/img/adoption-stories/adoption-stories-deutsche-telekom.png b/docs-website/static/img/adoption-stories/adoption-stories-deutsche-telekom.png new file mode 100644 index 00000000000000..015e2085237200 Binary files /dev/null and b/docs-website/static/img/adoption-stories/adoption-stories-deutsche-telekom.png differ diff --git a/docs-website/static/img/adoption-stories/adoption-stories-foursquare.png b/docs-website/static/img/adoption-stories/adoption-stories-foursquare.png new file mode 100644 index 00000000000000..7140db1be24943 Binary files /dev/null and b/docs-website/static/img/adoption-stories/adoption-stories-foursquare.png differ diff --git a/docs-website/static/img/adoption-stories/adoption-stories-miro.png b/docs-website/static/img/adoption-stories/adoption-stories-miro.png new file mode 100644 index 00000000000000..1ee3ee65729252 Binary files /dev/null and b/docs-website/static/img/adoption-stories/adoption-stories-miro.png differ diff --git a/docs-website/static/img/logos/companies/deutsche-telekom.png b/docs-website/static/img/logos/companies/deutsche-telekom.png new file mode 100644 index 00000000000000..d934e38d88791b Binary files /dev/null and b/docs-website/static/img/logos/companies/deutsche-telekom.png differ diff --git a/docs-website/static/img/logos/companies/foursquare.png b/docs-website/static/img/logos/companies/foursquare.png new file mode 100644 index 00000000000000..896f33c2beaf55 Binary files /dev/null and b/docs-website/static/img/logos/companies/foursquare.png differ diff --git a/docs-website/yarn.lock b/docs-website/yarn.lock index ee300b5cb7672e..039dac5d5556b2 100644 --- a/docs-website/yarn.lock +++ b/docs-website/yarn.lock @@ -4085,10 +4085,10 @@ bl@^4.0.3: inherits "^2.0.4" readable-stream "^3.4.0" -body-parser@1.20.2: - version "1.20.2" - resolved "https://registry.yarnpkg.com/body-parser/-/body-parser-1.20.2.tgz#6feb0e21c4724d06de7ff38da36dad4f57a747fd" - integrity sha512-ml9pReCu3M61kGlqoTm2umSXTlRTuGTx0bfYj+uIUKKYycG5NtSbeetV3faSU6R7ajOPw0g/J1PvK4qNy7s5bA== +body-parser@1.20.3: + version "1.20.3" + resolved "https://registry.yarnpkg.com/body-parser/-/body-parser-1.20.3.tgz#1953431221c6fb5cd63c4b36d53fab0928e548c6" + integrity sha512-7rAxByjUMqQ3/bHJy7D6OGXvx/MMc4IqBn/X0fcM1QUcAItpZrBEYhWGem+tzXH90c+G01ypMcYJBO9Y30203g== dependencies: bytes "3.1.2" content-type "~1.0.5" @@ -4098,7 +4098,7 @@ body-parser@1.20.2: http-errors "2.0.0" iconv-lite "0.4.24" on-finished "2.4.1" - qs "6.11.0" + qs "6.13.0" raw-body "2.5.2" type-is "~1.6.18" unpipe "1.0.0" @@ -5307,6 +5307,11 @@ encodeurl@~1.0.2: resolved "https://registry.yarnpkg.com/encodeurl/-/encodeurl-1.0.2.tgz#ad3ff4c86ec2d029322f5a02c3a9a606c95b3f59" integrity sha512-TPJXq8JqFaVYm2CWmPvnP2Iyo4ZSM7/QKcSmuMLDObfpH5fi7RUGmd/rTDf+rut/saiDiQEeVTNgAmJEdAOx0w== +encodeurl@~2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/encodeurl/-/encodeurl-2.0.0.tgz#7b8ea898077d7e409d3ac45474ea38eaf0857a58" + integrity sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg== + end-of-stream@^1.1.0, end-of-stream@^1.4.1: version "1.4.4" resolved "https://registry.yarnpkg.com/end-of-stream/-/end-of-stream-1.4.4.tgz#5ae64a5f45057baf3626ec14da0ca5e4b2431eb0" @@ -5501,36 +5506,36 @@ expand-template@^2.0.3: integrity sha512-XYfuKMvj4O35f/pOXLObndIRvyQ+/+6AhODh+OKWj9S9498pHHn/IMszH+gt0fBCRWMNfk1ZSp5x3AifmnI2vg== express@^4.17.3: - version "4.19.2" - resolved "https://registry.yarnpkg.com/express/-/express-4.19.2.tgz#e25437827a3aa7f2a827bc8171bbbb664a356465" - integrity sha512-5T6nhjsT+EOMzuck8JjBHARTHfMht0POzlA60WV2pMD3gyXw2LZnZ+ueGdNxG+0calOJcWKbpFcuzLZ91YWq9Q== + version "4.20.0" + resolved "https://registry.yarnpkg.com/express/-/express-4.20.0.tgz#f1d08e591fcec770c07be4767af8eb9bcfd67c48" + integrity sha512-pLdae7I6QqShF5PnNTCVn4hI91Dx0Grkn2+IAsMTgMIKuQVte2dN9PeGSSAME2FR8anOhVA62QDIUaWVfEXVLw== dependencies: accepts "~1.3.8" array-flatten "1.1.1" - body-parser "1.20.2" + body-parser "1.20.3" content-disposition "0.5.4" content-type "~1.0.4" cookie "0.6.0" cookie-signature "1.0.6" debug "2.6.9" depd "2.0.0" - encodeurl "~1.0.2" + encodeurl "~2.0.0" escape-html "~1.0.3" etag "~1.8.1" finalhandler "1.2.0" fresh "0.5.2" http-errors "2.0.0" - merge-descriptors "1.0.1" + merge-descriptors "1.0.3" methods "~1.1.2" on-finished "2.4.1" parseurl "~1.3.3" - path-to-regexp "0.1.7" + path-to-regexp "0.1.10" proxy-addr "~2.0.7" qs "6.11.0" range-parser "~1.2.1" safe-buffer "5.2.1" - send "0.18.0" - serve-static "1.15.0" + send "0.19.0" + serve-static "1.16.0" setprototypeof "1.2.0" statuses "2.0.1" type-is "~1.6.18" @@ -7565,10 +7570,10 @@ memfs@^3.1.2, memfs@^3.4.3: dependencies: fs-monkey "^1.0.4" -merge-descriptors@1.0.1: - version "1.0.1" - resolved "https://registry.yarnpkg.com/merge-descriptors/-/merge-descriptors-1.0.1.tgz#b00aaa556dd8b44568150ec9d1b953f3f90cbb61" - integrity sha512-cCi6g3/Zr1iqQi6ySbseM1Xvooa98N0w31jzUYrXPX2xqObmFGHJ0tQ5u74H3mVh7wLouTseZyYIq39g8cNp1w== +merge-descriptors@1.0.3: + version "1.0.3" + resolved "https://registry.yarnpkg.com/merge-descriptors/-/merge-descriptors-1.0.3.tgz#d80319a65f3c7935351e5cfdac8f9318504dbed5" + integrity sha512-gaNvAS7TZ897/rVaZ0nMtAyxNyi/pdbjbAwUpFQpN70GqnVfOiXpeUUMKRBmzXaSQ8DdTX4/0ms62r2K+hE6mQ== merge-stream@^2.0.0: version "2.0.0" @@ -8684,10 +8689,10 @@ path-parse@^1.0.7: resolved "https://registry.yarnpkg.com/path-parse/-/path-parse-1.0.7.tgz#fbc114b60ca42b30d9daf5858e4bd68bbedb6735" integrity sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw== -path-to-regexp@0.1.7: - version "0.1.7" - resolved "https://registry.yarnpkg.com/path-to-regexp/-/path-to-regexp-0.1.7.tgz#df604178005f522f15eb4490e7247a1bfaa67f8c" - integrity sha512-5DFkuoqlv1uYQKxy8omFBeJPQcdoE07Kv2sferDCrAq1ohOU+MSDswDIbnx3YAM60qIOnYa53wBhXW0EbMonrQ== +path-to-regexp@0.1.10: + version "0.1.10" + resolved "https://registry.yarnpkg.com/path-to-regexp/-/path-to-regexp-0.1.10.tgz#67e9108c5c0551b9e5326064387de4763c4d5f8b" + integrity sha512-7lf7qcQidTku0Gu3YDPc8DJ1q7OOucfa/BSsIwjuh56VU7katFvuM8hULfkwB3Fns/rsVF7PwPKVw1sl5KQS9w== path-to-regexp@2.2.1: version "2.2.1" @@ -9186,6 +9191,13 @@ qs@6.11.0: dependencies: side-channel "^1.0.4" +qs@6.13.0: + version "6.13.0" + resolved "https://registry.yarnpkg.com/qs/-/qs-6.13.0.tgz#6ca3bd58439f7e245655798997787b0d88a51906" + integrity sha512-+38qI9SOr8tfZ4QmJNplMUxqjbe7LKvvZgWdExBOmd+egZTtjLB67Gu0HRX3u/XOq7UU2Nx6nsjvS16Z9uwfpg== + dependencies: + side-channel "^1.0.6" + queue-microtask@^1.2.2: version "1.2.3" resolved "https://registry.yarnpkg.com/queue-microtask/-/queue-microtask-1.2.3.tgz#4929228bbc724dfac43e0efb058caf7b6cfb6243" @@ -10345,6 +10357,25 @@ send@0.18.0: range-parser "~1.2.1" statuses "2.0.1" +send@0.19.0: + version "0.19.0" + resolved "https://registry.yarnpkg.com/send/-/send-0.19.0.tgz#bbc5a388c8ea6c048967049dbeac0e4a3f09d7f8" + integrity sha512-dW41u5VfLXu8SJh5bwRmyYUbAoSB3c9uQh6L8h/KtsFREPWpbX1lrljJo186Jc4nmci/sGUZ9a0a0J2zgfq2hw== + dependencies: + debug "2.6.9" + depd "2.0.0" + destroy "1.2.0" + encodeurl "~1.0.2" + escape-html "~1.0.3" + etag "~1.8.1" + fresh "0.5.2" + http-errors "2.0.0" + mime "1.6.0" + ms "2.1.3" + on-finished "2.4.1" + range-parser "~1.2.1" + statuses "2.0.1" + serialize-javascript@^6.0.0, serialize-javascript@^6.0.1: version "6.0.2" resolved "https://registry.yarnpkg.com/serialize-javascript/-/serialize-javascript-6.0.2.tgz#defa1e055c83bf6d59ea805d8da862254eb6a6c2" @@ -10379,10 +10410,10 @@ serve-index@^1.9.1: mime-types "~2.1.17" parseurl "~1.3.2" -serve-static@1.15.0: - version "1.15.0" - resolved "https://registry.yarnpkg.com/serve-static/-/serve-static-1.15.0.tgz#faaef08cffe0a1a62f60cad0c4e513cff0ac9540" - integrity sha512-XGuRDNjXUijsUL0vl6nSD7cwURuzEgglbOaFuZM9g3kwDXOWVTck0jLzjPzGD+TazWbboZYu52/9/XPdUgne9g== +serve-static@1.16.0: + version "1.16.0" + resolved "https://registry.yarnpkg.com/serve-static/-/serve-static-1.16.0.tgz#2bf4ed49f8af311b519c46f272bf6ac3baf38a92" + integrity sha512-pDLK8zwl2eKaYrs8mrPZBJua4hMplRWJ1tIFksVC3FtBEBnl8dxgeHtsaMS8DhS9i4fLObaon6ABoc4/hQGdPA== dependencies: encodeurl "~1.0.2" escape-html "~1.0.3" @@ -10468,7 +10499,7 @@ shelljs@^0.8.5: interpret "^1.0.0" rechoir "^0.6.2" -side-channel@^1.0.4: +side-channel@^1.0.4, side-channel@^1.0.6: version "1.0.6" resolved "https://registry.yarnpkg.com/side-channel/-/side-channel-1.0.6.tgz#abd25fb7cd24baf45466406b1096b7831c9215f2" integrity sha512-fDW/EZ6Q9RiO8eFG8Hj+7u/oW+XrPTIChwCOM2+th2A6OblDtYYIpve9m+KvI9Z4C9qSEXlaGR6bTEYHReuglA== diff --git a/docs/advanced/bootstrap-mcps.md b/docs/advanced/bootstrap-mcps.md new file mode 100644 index 00000000000000..0aa4b7608740f8 --- /dev/null +++ b/docs/advanced/bootstrap-mcps.md @@ -0,0 +1,157 @@ +# Bootstrap MetadataChangeProposals (MCPs) + +Bootstrap MCPs are templated MCPs which are loaded when the `system-update` job runs. This allows adding +entities and aspects to DataHub at install time with the ability to customize them via environment variable +overrides. + +The built-in bootstrap MCP process can also be extended with custom MCPs. This can streamline deployment +scenarios where a set of standard ingestion recipes, data platforms, users groups, or other configuration +can be applied without the need for developing custom scripts. + +## Process Overview + +When DataHub is installed or upgraded, a job runs called `system-update`, this job is responsible for data +migration (particularly Elasticsearch indices) and ensuring the data is prepared for the next version of +DataHub. This is the job which will also apply the bootstrap MCPs. + +The `system-update` job, depending on configuration, can be split into two sequences of steps. If they are +not split, then all steps are blocking. + +1. An initial blocking sequence which is run prior to the new version of GMS and other components +2. Second sequence of steps where GMS and other components are allowed to run while additional data migration steps are +continued in the background + +When applying bootstrap MCPs `system-update` will perform the following steps: + +1. The `bootstrap_mcps.yaml` file is read, either from a default classpath location, `bootstrap_mcps.yaml`, or a filesystem location + provided by an environment variable, `SYSTEM_UPDATE_BOOTSTRAP_MCP_CONFIG`. +2. Depending on the mode of blocking or non-blocking each entry in the configuration file will be executed in sequence. +3. The template MCP file is loaded either from the classpath, or a filesystem location, and the template values are applied. +4. The rendered template MCPs are executed with the options specified in the `bootstrap_mcps.yaml`. + +## `bootstrap_mcps.yaml` Configuration + +The `bootstrap_mcps.yaml` file has the following format. + +```yaml +bootstrap: + templates: + - name: + version: + force: false + blocking: false + async: true + optional: false + mcps_location: + values_env: +``` + +Each entry in the list of templates points to a single yaml file which can contain one or more MCP objects. The +execution of the template MCPs is tracked by name and version to prevent re-execution. The MCP objects are executed once +unless `force=true` for each `name`/`version` combination. + +See the following table of options for descriptions of each field in the template configuration. + +| Field | Default | Required | Description | +|---------------|----------|-----------|------------------------------------------------------------------------------------------------------------| +| name | | `true` | The name for the collection of template MCPs. | +| version | | `true` | A string version for the collection of template MCPs. | +| force | `false` | `false` | Ignores the previous run history, will not skip execution if run previously. | +| blocking | `false` | `false` | Run before GMS and other components during upgrade/install if running in split blocking/non-blocking mode. | +| async | `true` | `false` | Controls whether the MCPs are executed for sync or async ingestion. | +| optional | `false` | `false` | Whether to ignore a failure or fail the entire `system-update` job. | +| mcps_location | | `true` | The location of the file which contains the template MCPs | +| values_env | | `false` | The environment variable which contains override template values. | + +## Template MCPs + +Template MCPs are stored in a yaml file which uses the mustache templating library to populate values from an optional environment +variable. Defaults can be provided inline making override only necessary when providing install/upgrade time configuration. + +In general the file contains a list of MCPs which follow the schema definition for MCPs exactly. Any valid field for an MCP +is accepted, including optional fields such as `headers`. + + +### Example: Native Group + +An example template MCP collection, configuration, and values environment variable is shown below which would create a native group. + +```yaml +- entityUrn: urn:li:corpGroup:{{group.id}} + entityType: corpGroup + aspectName: corpGroupInfo + changeType: UPSERT + aspect: + description: {{group.description}}{{^group.description}}Default description{{/group.description}} + displayName: {{group.displayName}} + created: {{&auditStamp}} + members: [] # required as part of the aspect's schema definition + groups: [] # required as part of the aspect's schema definition + admins: [] # required as part of the aspect's schema definition +- entityUrn: urn:li:corpGroup:{{group.id}} + entityType: corpGroup + aspectName: origin + changeType: UPSERT + aspect: + type: NATIVE +``` + +Creating an entry in the `bootstrap_mcps.yaml` to populate the values from the environment variable `DATAHUB_TEST_GROUP_VALUES` + +```yaml + - name: test-group + version: v1 + mcps_location: "bootstrap_mcps/test-group.yaml" + values_env: "DATAHUB_TEST_GROUP_VALUES" +``` + +An example json values are loaded from environment variable in `DATAHUB_TEST_GROUP_VALUES` might look like the following. + +```json +{"group":{"id":"mygroup", "displayName":"My Group", "description":"Description of the group"}} +``` + +Using standard mustache template semantics the values in the environment would be inserted into the yaml structure +and ingested when the `system-update` runs. + +#### Default values + +In the example above, the group's `description` if not provided would default to `Default description` if not specified +in the values contain in the environment variable override following the standard mustache template semantics. + +#### AuditStamp + +A special template reference, `{{&auditStamp}}` can be used to inject an `auditStamp` into the aspect. This can be used to +populate required fields of type `auditStamp` calculated from when the MCP is applied. This will insert an inline json representation +of the `auditStamp` into the location and avoid escaping html characters per standard mustache template indicated by the `&` character. + +### Ingestion Template MCPs + +Ingestion template MCPs are slightly more complicated since the ingestion `recipe` is stored as a json string within the aspect. +For ingestion recipes, special handling was added so that they can be described naturally in yaml instead of the normally encoded json string. + +This means that in the example below, the structure beneath the `aspect.config.recipe` path will be automatically converted +to the required json structure and stored as a string. + +```yaml +- entityType: dataHubIngestionSource + entityUrn: urn:li:dataHubIngestionSource:demo-data + aspectName: dataHubIngestionSourceInfo + changeType: UPSERT + aspect: + type: 'demo-data' + name: 'demo-data' + config: + recipe: + source: + type: 'datahub-gc' + config: {} + executorId: default +``` + +## Known Limitations + +* Supported change types: + * UPSERT + * CREATE + * CREATE_ENTITY diff --git a/docs/api/restli/restli-overview.md b/docs/api/restli/restli-overview.md index d8a81075263747..22b913d9a25df4 100644 --- a/docs/api/restli/restli-overview.md +++ b/docs/api/restli/restli-overview.md @@ -1203,6 +1203,7 @@ where valid conditions include - CONTAIN - END_WITH - EQUAL + - IEQUAL (Supports case insensitive equals) - GREATER_THAN - GREATER_THAN_OR_EQUAL_TO - LESS_THAN diff --git a/docs/how/add-custom-data-platform.md b/docs/how/add-custom-data-platform.md index 5dcd423e775698..3ffb61c39e5bf5 100644 --- a/docs/how/add-custom-data-platform.md +++ b/docs/how/add-custom-data-platform.md @@ -12,7 +12,7 @@ your custom Data Platform will persist even between full cleans (nukes) of DataH ## Changing Default Data Platforms -Simply make a change to the [data_platforms.json](https://github.com/datahub-project/datahub/blob/master/metadata-service/war/src/main/resources/boot/data_platforms.json) +Simply make a change to the [data_platforms.yaml](https://github.com/datahub-project/datahub/blob/master/metadata-service/configuration/src/main/resources/bootstrap_mcps/data-platforms.yaml) file to add a custom Data Platform: ``` diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index f878a864e20fab..89ea8ce8c543ab 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -23,6 +23,7 @@ This file documents any backwards-incompatible changes in DataHub and assists pe - #11486 - Deprecated Criterion filters using `value`. Use `values` instead. This also deprecates the ability to use comma delimited string to represent multiple values using `value`. - #11484 - Metadata service authentication enabled by default - #11484 - Rest API authorization enabled by default +- #10472 - `SANDBOX` added as a FabricType. No rollbacks allowed once metadata with this fabric type is added without manual cleanups in databases. ### Potential Downtime diff --git a/docs/what-is-datahub/datahub-concepts.md b/docs/what-is-datahub/datahub-concepts.md index 03b86fab0ede41..8741d445f10f7d 100644 --- a/docs/what-is-datahub/datahub-concepts.md +++ b/docs/what-is-datahub/datahub-concepts.md @@ -99,7 +99,7 @@ List of Data Platforms - Tableau - Vertica -Reference : [data_platforms.json](https://github.com/datahub-project/datahub/blob/master/metadata-service/war/src/main/resources/boot/data_platforms.json) +Reference : [data_platforms.yaml](https://github.com/datahub-project/datahub/blob/master/metadata-service/configuration/src/main/resources/bootstrap_mcps/data-platforms.yaml) diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/dataproduct/DataProductPropertiesTemplate.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/dataproduct/DataProductPropertiesTemplate.java index 9b117114395b12..f1b83732f7a9f0 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/dataproduct/DataProductPropertiesTemplate.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/dataproduct/DataProductPropertiesTemplate.java @@ -10,8 +10,8 @@ public class DataProductPropertiesTemplate implements ArrayMergingTemplate { - private static final String ASSETS_FIELD_NAME = "assets"; - private static final String KEY_FIELD_NAME = "destinationUrn"; + public static final String ASSETS_FIELD_NAME = "assets"; + public static final String KEY_FIELD_NAME = "destinationUrn"; @Override public DataProductProperties getSubtype(RecordTemplate recordTemplate) throws ClassCastException { diff --git a/li-utils/src/main/pegasus/com/linkedin/common/FabricType.pdl b/li-utils/src/main/pegasus/com/linkedin/common/FabricType.pdl index 366843e460cb35..fd7bdfc88614ff 100644 --- a/li-utils/src/main/pegasus/com/linkedin/common/FabricType.pdl +++ b/li-utils/src/main/pegasus/com/linkedin/common/FabricType.pdl @@ -59,4 +59,9 @@ enum FabricType { * Designates review fabrics */ RVW + + /** + * Designates sandbox fabrics + */ + SANDBOX } diff --git a/metadata-ingestion/adding-source.md b/metadata-ingestion/adding-source.md index 6baddf6b2010dc..541f5437b9da85 100644 --- a/metadata-ingestion/adding-source.md +++ b/metadata-ingestion/adding-source.md @@ -240,7 +240,7 @@ in [sql_common.py](./src/datahub/ingestion/source/sql/sql_common.py) if the sour ### 9. Add logo for the platform -Add the logo image in [images folder](../datahub-web-react/src/images) and add it to be ingested at [startup](../metadata-service/war/src/main/resources/boot/data_platforms.json) +Add the logo image in [images folder](../datahub-web-react/src/images) and add it to be ingested at [startup](../metadata-service/configuration/src/main/resources/bootstrap_mcps/data-platforms.yaml) ### 10. Update Frontend for UI-based ingestion diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py index 1f54767de5a68b..3cbb13375229b9 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py @@ -21,6 +21,7 @@ ) from looker_sdk.error import SDKError +from looker_sdk.rtl.serialize import DeserializeError from looker_sdk.sdk.api40.models import ( LookmlModelExplore, LookmlModelExploreField, @@ -1131,7 +1132,16 @@ def from_api( # noqa: C901 logger.warning( f"Failed to extract explore {explore_name} from model {model}: {e}" ) - + except DeserializeError as e: + reporter.warning( + title="Failed to fetch explore from the Looker API", + message=( + "An error occurred while extracting the explore from the model. " + "Please check the explore and model configurations." + ), + context=f"Explore: {explore_name}, Model: {model}", + exc=e, + ) except AssertionError: reporter.report_warning( title="Unable to find Views", diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py index e593e132dafd7e..f269ccf1cd98f8 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py @@ -16,6 +16,7 @@ ) from looker_sdk.error import SDKError +from looker_sdk.rtl.serialize import DeserializeError from looker_sdk.sdk.api40.models import ( Dashboard, DashboardElement, @@ -1288,12 +1289,13 @@ def process_dashboard( dashboard_id=dashboard_id, fields=fields, ) - except SDKError: + except (SDKError, DeserializeError) as e: # A looker dashboard could be deleted in between the list and the get self.reporter.report_warning( - title="Error Loading Dashboard", + title="Failed to fetch dashboard from the Looker API", message="Error occurred while attempting to loading dashboard from Looker API. Skipping.", context=f"Dashboard ID: {dashboard_id}", + exc=e, ) return [], None, dashboard_id, start_time, datetime.datetime.now() diff --git a/metadata-ingestion/src/datahub/ingestion/source/metabase.py b/metadata-ingestion/src/datahub/ingestion/source/metabase.py index 49fa9dab5f1d8d..828bbd213a796f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/metabase.py +++ b/metadata-ingestion/src/datahub/ingestion/source/metabase.py @@ -725,7 +725,7 @@ def get_datasource_from_id( return "", None, None, None # Map engine names to what datahub expects in - # https://github.com/datahub-project/datahub/blob/master/metadata-service/war/src/main/resources/boot/data_platforms.json + # https://github.com/datahub-project/datahub/blob/master/metadata-service/configuration/src/main/resources/bootstrap_mcps/data-platforms.yaml engine = dataset_json.get("engine", "") engine_mapping = { diff --git a/metadata-ingestion/src/datahub/ingestion/source/metadata/business_glossary.py b/metadata-ingestion/src/datahub/ingestion/source/metadata/business_glossary.py index d3c4e2e3cd80e8..79ec47a7efb2c2 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/metadata/business_glossary.py +++ b/metadata-ingestion/src/datahub/ingestion/source/metadata/business_glossary.py @@ -40,6 +40,7 @@ class Owners(ConfigModel): type: str = models.OwnershipTypeClass.DEVELOPER + typeUrn: Optional[str] = None users: Optional[List[str]] = None groups: Optional[List[str]] = None @@ -154,6 +155,8 @@ def make_glossary_term_urn( def get_owners(owners: Owners) -> models.OwnershipClass: ownership_type, ownership_type_urn = validate_ownership_type(owners.type) + if owners.typeUrn is not None: + ownership_type_urn = owners.typeUrn owners_meta: List[models.OwnerClass] = [] if owners.users is not None: owners_meta = owners_meta + [ diff --git a/metadata-ingestion/src/datahub/ingestion/source/mode.py b/metadata-ingestion/src/datahub/ingestion/source/mode.py index 56b8ce00a4d1f2..e24cba9b193d31 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/mode.py +++ b/metadata-ingestion/src/datahub/ingestion/source/mode.py @@ -686,7 +686,7 @@ def construct_chart_custom_properties( def _get_datahub_friendly_platform(self, adapter, platform): # Map adaptor names to what datahub expects in - # https://github.com/datahub-project/datahub/blob/master/metadata-service/war/src/main/resources/boot/data_platforms.json + # https://github.com/datahub-project/datahub/blob/master/metadata-service/configuration/src/main/resources/bootstrap_mcps/data-platforms.yaml platform_mapping = { "jdbc:athena": "athena", diff --git a/metadata-ingestion/src/datahub/ingestion/source/nifi.py b/metadata-ingestion/src/datahub/ingestion/source/nifi.py index 25781cd2f1dcc9..7072ebf6473df1 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/nifi.py +++ b/metadata-ingestion/src/datahub/ingestion/source/nifi.py @@ -332,10 +332,14 @@ def __init__(self) -> None: } def process_s3_provenance_event(self, event): + logger.debug(f"Processing s3 provenance event: {event}") attributes = event.get("attributes", []) s3_bucket = get_attribute_value(attributes, "s3.bucket") s3_key = get_attribute_value(attributes, "s3.key") if not s3_key: + logger.debug( + "s3.key not present in the list of attributes, trying to use filename attribute instead" + ) s3_key = get_attribute_value(attributes, "filename") s3_url = f"s3://{s3_bucket}/{s3_key}" @@ -344,6 +348,7 @@ def process_s3_provenance_event(self, event): dataset_name = s3_path.replace("/", ".") platform = "s3" dataset_urn = builder.make_dataset_urn(platform, s3_path, self.env) + logger.debug(f"Reasoned s3 dataset urn: {dataset_urn}") return ExternalDataset( platform, dataset_name, @@ -910,6 +915,11 @@ def construct_workunits(self) -> Iterable[MetadataWorkUnit]: # noqa: C901 ) for component in self.nifi_flow.components.values(): + logger.debug( + f"Beginng construction of workunits for component {component.id} of type {component.type} and name {component.name}" + ) + logger.debug(f"Inlets of the component: {component.inlets.keys()}") + logger.debug(f"Outlets of the component: {component.outlets.keys()}") job_name = component.name job_urn = builder.make_data_job_urn_with_flow(flow_urn, component.id) @@ -937,6 +947,9 @@ def construct_workunits(self) -> Iterable[MetadataWorkUnit]: # noqa: C901 jobProperties["last_event_time"] = component.last_event_time for dataset in component.inlets.values(): + logger.debug( + f"Yielding dataset workunits for {dataset.dataset_urn} (inlet)" + ) yield from self.construct_dataset_workunits( dataset.platform, dataset.dataset_name, @@ -945,6 +958,9 @@ def construct_workunits(self) -> Iterable[MetadataWorkUnit]: # noqa: C901 ) for dataset in component.outlets.values(): + logger.debug( + f"Yielding dataset workunits for {dataset.dataset_urn} (outlet)" + ) yield from self.construct_dataset_workunits( dataset.platform, dataset.dataset_name, @@ -1207,6 +1223,7 @@ def construct_job_workunits( inputJobs: List[str] = [], status: Optional[str] = None, ) -> Iterable[MetadataWorkUnit]: + logger.debug(f"Begining construction of job workunit for {job_urn}") if job_properties: job_properties = {k: v for k, v in job_properties.items() if v is not None} @@ -1229,8 +1246,12 @@ def construct_job_workunits( inlets.sort() outlets.sort() inputJobs.sort() + logger.debug(f"Inlets after sorting: {inlets}") + logger.debug(f"Outlets after sorting: {outlets}") + logger.debug(f"Input jobs after sorting: {inputJobs}") if self.config.incremental_lineage: + logger.debug("Preparing mcps for incremental lineage") patch_builder: DataJobPatchBuilder = DataJobPatchBuilder(job_urn) for inlet in inlets: patch_builder.add_input_dataset(inlet) @@ -1239,6 +1260,7 @@ def construct_job_workunits( for inJob in inputJobs: patch_builder.add_input_datajob(inJob) for patch_mcp in patch_builder.build(): + logger.debug(f"Preparing Patch MCP: {patch_mcp}") yield MetadataWorkUnit( id=f"{job_urn}-{patch_mcp.aspectName}", mcp_raw=patch_mcp ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma_api.py b/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma_api.py index 66d4678e521328..3e88f43142ede6 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma_api.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma_api.py @@ -141,12 +141,19 @@ def _get_users(self) -> Dict[str, str]: logger.debug("Fetching all accessible users metadata.") try: users: Dict[str, str] = {} - response = self._get_api_call(f"{self.config.api_url}/members") - response.raise_for_status() - for user_dict in response.json(): - users[ - user_dict[Constant.MEMBERID] - ] = f"{user_dict[Constant.FIRSTNAME]}_{user_dict[Constant.LASTNAME]}" + members_url = url = f"{self.config.api_url}/members?limit=50" + while True: + response = self._get_api_call(url) + response.raise_for_status() + response_dict = response.json() + for user_dict in response_dict[Constant.ENTRIES]: + users[ + user_dict[Constant.MEMBERID] + ] = f"{user_dict[Constant.FIRSTNAME]}_{user_dict[Constant.LASTNAME]}" + if response_dict[Constant.NEXTPAGE]: + url = f"{members_url}&page={response_dict[Constant.NEXTPAGE]}" + else: + break return users except Exception as e: self._log_http_error( diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_common.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_common.py index 1fbf31a48890d8..8d6746b6433a4e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_common.py @@ -558,7 +558,7 @@ def get_platform(connection_type: str) -> str: # connection_type taken from # https://help.tableau.com/current/api/rest_api/en-us/REST/rest_api_concepts_connectiontype.htm # datahub platform mapping is found here - # https://github.com/datahub-project/datahub/blob/master/metadata-service/war/src/main/resources/boot/data_platforms.json + # https://github.com/datahub-project/datahub/blob/master/metadata-service/configuration/src/main/resources/bootstrap_mcps/data-platforms.yaml if connection_type in ("textscan", "textclean", "excel-direct", "excel", "csv"): platform = "external" diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_dataproduct.py b/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_dataproduct.py index 4045917eb830e0..ce224bde003fd3 100644 --- a/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_dataproduct.py +++ b/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_dataproduct.py @@ -53,9 +53,9 @@ def handle_end_of_stream( data_products: Dict[str, DataProductPatchBuilder] = {} data_products_container: Dict[str, DataProductPatchBuilder] = {} logger.debug("Generating dataproducts") + is_container = self.config.is_container for entity_urn in self.entity_map.keys(): data_product_urn = self.config.get_data_product_to_add(entity_urn) - is_container = self.config.is_container if data_product_urn: if data_product_urn not in data_products: data_products[data_product_urn] = DataProductPatchBuilder( diff --git a/metadata-ingestion/src/datahub/specific/datajob.py b/metadata-ingestion/src/datahub/specific/datajob.py index 2d944edeb36403..8da8edc8ef0f22 100644 --- a/metadata-ingestion/src/datahub/specific/datajob.py +++ b/metadata-ingestion/src/datahub/specific/datajob.py @@ -330,7 +330,7 @@ def add_output_dataset( self._add_patch( DataJobInputOutput.ASPECT_NAME, "add", - path=f"/outputDatasetEdges/{self.quote(str(input))}", + path=f"/outputDatasetEdges/{self.quote(str(output))}", value=output_edge, ) return self diff --git a/metadata-ingestion/tests/integration/sigma/test_sigma.py b/metadata-ingestion/tests/integration/sigma/test_sigma.py index b6e9db99eed39a..6c01bf6dc80fe7 100644 --- a/metadata-ingestion/tests/integration/sigma/test_sigma.py +++ b/metadata-ingestion/tests/integration/sigma/test_sigma.py @@ -381,25 +381,29 @@ def register_mock_api(request_mock: Any, override_data: dict = {}) -> None: "https://aws-api.sigmacomputing.com/v2/members": { "method": "GET", "status_code": 200, - "json": [ - { - "organizationId": "b94da709-176c-4242-bea6-6760f34c9228", - "memberId": "CPbEdA26GNQ2cM2Ra2BeO0fa5Awz1", - "memberType": "admin", - "firstName": "Shubham", - "lastName": "Jagtap", - "email": "john.doe@example.com", - "profileImgUrl": None, - "createdBy": "CPbEdA26GNQ2cM2Ra2BeO0fa5Awz1", - "updatedBy": "CPbEdA26GNQ2cM2Ra2BeO0fa5Awz1", - "createdAt": "2023-11-28T10:59:20.957Z", - "updatedAt": "2024-03-12T21:21:17.996Z", - "homeFolderId": "9bb94df1-e8af-49eb-9c37-2bd40b0efb2e", - "userKind": "internal", - "isArchived": False, - "isInactive": False, - }, - ], + "json": { + "entries": [ + { + "organizationId": "b94da709-176c-4242-bea6-6760f34c9228", + "memberId": "CPbEdA26GNQ2cM2Ra2BeO0fa5Awz1", + "memberType": "admin", + "firstName": "Shubham", + "lastName": "Jagtap", + "email": "john.doe@example.com", + "profileImgUrl": None, + "createdBy": "CPbEdA26GNQ2cM2Ra2BeO0fa5Awz1", + "updatedBy": "CPbEdA26GNQ2cM2Ra2BeO0fa5Awz1", + "createdAt": "2023-11-28T10:59:20.957Z", + "updatedAt": "2024-03-12T21:21:17.996Z", + "homeFolderId": "9bb94df1-e8af-49eb-9c37-2bd40b0efb2e", + "userKind": "internal", + "isArchived": False, + "isInactive": False, + }, + ], + "total": 1, + "nextPage": None, + }, }, } diff --git a/metadata-ingestion/tests/unit/patch/test_patch_builder.py b/metadata-ingestion/tests/unit/patch/test_patch_builder.py index 8c2a4b2c4a6ddd..267da6cdd5d205 100644 --- a/metadata-ingestion/tests/unit/patch/test_patch_builder.py +++ b/metadata-ingestion/tests/unit/patch/test_patch_builder.py @@ -1,10 +1,14 @@ +import json import pathlib import pytest +from freezegun.api import freeze_time from datahub.emitter.mce_builder import ( make_chart_urn, make_dashboard_urn, + make_data_flow_urn, + make_data_job_urn_with_flow, make_dataset_urn, make_schema_field_urn, make_tag_urn, @@ -22,6 +26,7 @@ ) from datahub.specific.chart import ChartPatchBuilder from datahub.specific.dashboard import DashboardPatchBuilder +from datahub.specific.datajob import DataJobPatchBuilder from datahub.specific.dataset import DatasetPatchBuilder from tests.test_helpers import mce_helpers @@ -175,3 +180,85 @@ def test_basic_dashboard_patch_builder(): ), ), ] + + +@freeze_time("2020-04-14 07:00:00") +def test_datajob_patch_builder(): + flow_urn = make_data_flow_urn( + orchestrator="nifi", flow_id="252C34e5af19-0192-1000-b248-b1abee565b5d" + ) + job_urn = make_data_job_urn_with_flow( + flow_urn, "5ca6fee7-0192-1000-f206-dfbc2b0d8bfb" + ) + patcher = DataJobPatchBuilder(job_urn) + + patcher.add_output_dataset( + "urn:li:dataset:(urn:li:dataPlatform:s3,output-bucket/folder1,DEV)" + ) + patcher.add_output_dataset( + "urn:li:dataset:(urn:li:dataPlatform:s3,output-bucket/folder3,DEV)" + ) + patcher.add_output_dataset( + "urn:li:dataset:(urn:li:dataPlatform:s3,output-bucket/folder2,DEV)" + ) + + assert patcher.build() == [ + MetadataChangeProposalClass( + entityType="dataJob", + entityUrn="urn:li:dataJob:(urn:li:dataFlow:(nifi,252C34e5af19-0192-1000-b248-b1abee565b5d,prod),5ca6fee7-0192-1000-f206-dfbc2b0d8bfb)", + changeType="PATCH", + aspectName="dataJobInputOutput", + aspect=GenericAspectClass( + value=json.dumps( + [ + { + "op": "add", + "path": "/outputDatasetEdges/urn:li:dataset:(urn:li:dataPlatform:s3,output-bucket~1folder1,DEV)", + "value": { + "destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,output-bucket/folder1,DEV)", + "created": { + "time": 1586847600000, + "actor": "urn:li:corpuser:datahub", + }, + "lastModified": { + "time": 1586847600000, + "actor": "urn:li:corpuser:datahub", + }, + }, + }, + { + "op": "add", + "path": "/outputDatasetEdges/urn:li:dataset:(urn:li:dataPlatform:s3,output-bucket~1folder3,DEV)", + "value": { + "destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,output-bucket/folder3,DEV)", + "created": { + "time": 1586847600000, + "actor": "urn:li:corpuser:datahub", + }, + "lastModified": { + "time": 1586847600000, + "actor": "urn:li:corpuser:datahub", + }, + }, + }, + { + "op": "add", + "path": "/outputDatasetEdges/urn:li:dataset:(urn:li:dataPlatform:s3,output-bucket~1folder2,DEV)", + "value": { + "destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,output-bucket/folder2,DEV)", + "created": { + "time": 1586847600000, + "actor": "urn:li:corpuser:datahub", + }, + "lastModified": { + "time": 1586847600000, + "actor": "urn:li:corpuser:datahub", + }, + }, + }, + ] + ).encode("utf-8"), + contentType="application/json-patch+json", + ), + ) + ] diff --git a/metadata-integration/java/datahub-client/scripts/check_jar.sh b/metadata-integration/java/datahub-client/scripts/check_jar.sh index f76931428e3d6c..10299ec714d165 100755 --- a/metadata-integration/java/datahub-client/scripts/check_jar.sh +++ b/metadata-integration/java/datahub-client/scripts/check_jar.sh @@ -36,10 +36,11 @@ jar -tvf $jarFile |\ grep -v "darwin" |\ grep -v "MetadataChangeProposal.avsc" |\ grep -v "aix" |\ - grep -v "com/sun/" - grep -v "mozilla" - grep -v "VersionInfo.java" - grep -v "mime.types" + grep -v "com/sun/" |\ + grep -v "mozilla" |\ + grep -v "VersionInfo.java" |\ + grep -v "mime.types" |\ + grep -v "com/ibm/.*" if [ $? -ne 0 ]; then diff --git a/metadata-integration/java/spark-lineage-legacy/spark-smoke-test/smoke-gms.env b/metadata-integration/java/spark-lineage-legacy/spark-smoke-test/smoke-gms.env new file mode 100644 index 00000000000000..7b437a98089ce7 --- /dev/null +++ b/metadata-integration/java/spark-lineage-legacy/spark-smoke-test/smoke-gms.env @@ -0,0 +1,2 @@ +REST_API_AUTHORIZATION_ENABLED=false +METADATA_SERVICE_AUTH_ENABLED=false \ No newline at end of file diff --git a/metadata-io/src/main/java/com/linkedin/metadata/dataproducts/sideeffects/DataProductUnsetSideEffect.java b/metadata-io/src/main/java/com/linkedin/metadata/dataproducts/sideeffects/DataProductUnsetSideEffect.java new file mode 100644 index 00000000000000..544040d14f8b7c --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/dataproducts/sideeffects/DataProductUnsetSideEffect.java @@ -0,0 +1,134 @@ +package com.linkedin.metadata.dataproducts.sideeffects; + +import static com.linkedin.metadata.Constants.DATA_PRODUCT_ENTITY_NAME; +import static com.linkedin.metadata.Constants.DATA_PRODUCT_PROPERTIES_ASPECT_NAME; +import static com.linkedin.metadata.search.utils.QueryUtils.EMPTY_FILTER; + +import com.google.common.collect.ImmutableList; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.dataproduct.DataProductAssociation; +import com.linkedin.dataproduct.DataProductAssociationArray; +import com.linkedin.dataproduct.DataProductProperties; +import com.linkedin.metadata.aspect.RetrieverContext; +import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.aspect.batch.MCLItem; +import com.linkedin.metadata.aspect.batch.MCPItem; +import com.linkedin.metadata.aspect.models.graph.RelatedEntities; +import com.linkedin.metadata.aspect.models.graph.RelatedEntitiesScrollResult; +import com.linkedin.metadata.aspect.patch.GenericJsonPatch; +import com.linkedin.metadata.aspect.patch.PatchOperationType; +import com.linkedin.metadata.aspect.patch.template.dataproduct.DataProductPropertiesTemplate; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.aspect.plugins.hooks.MCPSideEffect; +import com.linkedin.metadata.entity.ebean.batch.PatchItemImpl; +import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.metadata.query.filter.RelationshipDirection; +import com.linkedin.metadata.search.utils.QueryUtils; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Stream; +import javax.annotation.Nonnull; +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.Accessors; +import lombok.extern.slf4j.Slf4j; + +/** + * Side effect that enforces single data product being associated with each entity by removing any + * previous relation when evaluation updates to Data Product Properties aspects. + */ +@Slf4j +@Getter +@Setter +@Accessors(chain = true) +public class DataProductUnsetSideEffect extends MCPSideEffect { + @Nonnull private AspectPluginConfig config; + + @Override + protected Stream applyMCPSideEffect( + Collection changeMCPS, @Nonnull RetrieverContext retrieverContext) { + return Stream.of(); + } + + @Override + protected Stream postMCPSideEffect( + Collection mclItems, @Nonnull RetrieverContext retrieverContext) { + return mclItems.stream().flatMap(item -> generatePatchRemove(item, retrieverContext)); + } + + private static Stream generatePatchRemove( + MCLItem mclItem, @Nonnull RetrieverContext retrieverContext) { + + if (DATA_PRODUCT_PROPERTIES_ASPECT_NAME.equals(mclItem.getAspectName())) { + List mcpItems = new ArrayList<>(); + DataProductProperties dataProductProperties = mclItem.getAspect(DataProductProperties.class); + if (dataProductProperties == null) { + log.error("Unable to process data product properties for urn: {}", mclItem.getUrn()); + return Stream.empty(); + } + for (DataProductAssociation dataProductAssociation : + Optional.ofNullable(dataProductProperties.getAssets()) + .orElse(new DataProductAssociationArray())) { + RelatedEntitiesScrollResult result = + retrieverContext + .getGraphRetriever() + .scrollRelatedEntities( + null, + QueryUtils.newFilter( + "urn", dataProductAssociation.getDestinationUrn().toString()), + null, + EMPTY_FILTER, + ImmutableList.of("DataProductContains"), + QueryUtils.newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.INCOMING), + Collections.emptyList(), + null, + 10, // Should only ever be one, if ever greater than ten will decrease over time + // to become consistent + null, + null); + if (!result.getEntities().isEmpty()) { + for (RelatedEntities entity : result.getEntities()) { + if (!mclItem.getUrn().equals(UrnUtils.getUrn(entity.getSourceUrn()))) { + EntitySpec entitySpec = + retrieverContext + .getAspectRetriever() + .getEntityRegistry() + .getEntitySpec(DATA_PRODUCT_ENTITY_NAME); + GenericJsonPatch.PatchOp patchOp = new GenericJsonPatch.PatchOp(); + patchOp.setOp(PatchOperationType.REMOVE.getValue()); + patchOp.setPath(String.format("/assets/%s", entity.getDestinationUrn())); + mcpItems.add( + PatchItemImpl.builder() + .urn(UrnUtils.getUrn(entity.getSourceUrn())) + .entitySpec( + retrieverContext + .getAspectRetriever() + .getEntityRegistry() + .getEntitySpec(DATA_PRODUCT_ENTITY_NAME)) + .aspectName(DATA_PRODUCT_PROPERTIES_ASPECT_NAME) + .aspectSpec(entitySpec.getAspectSpec(DATA_PRODUCT_PROPERTIES_ASPECT_NAME)) + .patch( + GenericJsonPatch.builder() + .arrayPrimaryKeys( + Map.of( + DataProductPropertiesTemplate.ASSETS_FIELD_NAME, + List.of(DataProductPropertiesTemplate.KEY_FIELD_NAME))) + .patch(List.of(patchOp)) + .build() + .getJsonPatch()) + .auditStamp(mclItem.getAuditStamp()) + .systemMetadata(mclItem.getSystemMetadata()) + .build(retrieverContext.getAspectRetriever().getEntityRegistry())); + } + } + } + } + return mcpItems.stream(); + } + return Stream.empty(); + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/validation/ValidationUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/validation/ValidationUtils.java index ddcc6b65992319..6ecac70e13c7e5 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/validation/ValidationUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/validation/ValidationUtils.java @@ -205,7 +205,8 @@ public static LineageSearchResult validateLineageSearchResult( public static EntityLineageResult validateEntityLineageResult( @Nonnull OperationContext opContext, @Nullable final EntityLineageResult entityLineageResult, - @Nonnull final EntityService entityService) { + @Nonnull final EntityService entityService, + boolean includeGhostEntities) { if (entityLineageResult == null) { return null; } @@ -223,8 +224,8 @@ public static EntityLineageResult validateEntityLineageResult( entityLineageResult.getRelationships(), LineageRelationship::getEntity, entityService, - true, - false) + !includeGhostEntities, + includeGhostEntities) .collect(Collectors.toCollection(LineageRelationshipArray::new)); validatedEntityLineageResult.setFiltered( @@ -280,6 +281,8 @@ private static Stream validateSearchUrns( boolean includeSoftDeleted) { if (enforceSQLExistence) { + // TODO: Always set includeSoftDeleted to true once 0.3.7 OSS merge occurs, as soft deleted + // results will be filtered by graph service Set existingUrns = entityService.exists( opContext, diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/SiblingGraphService.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/SiblingGraphService.java index f9287ab34cf192..993b5a457206d1 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/SiblingGraphService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/SiblingGraphService.java @@ -38,10 +38,29 @@ public EntityLineageResult getLineage( int offset, int count, int maxHops) { - return ValidationUtils.validateEntityLineageResult( + return getLineage(opContext, entityUrn, direction, offset, count, maxHops, false, false); + } + + @Nonnull + public EntityLineageResult getLineage( + @Nonnull OperationContext opContext, + @Nonnull Urn entityUrn, + @Nonnull LineageDirection direction, + int offset, + int count, + int maxHops, + boolean separateSiblings, + boolean includeGhostEntities) { + return getLineage( opContext, - getLineage(opContext, entityUrn, direction, offset, count, maxHops, false, new HashSet<>()), - _entityService); + entityUrn, + direction, + offset, + count, + maxHops, + separateSiblings, + includeGhostEntities, + new HashSet<>()); } /** @@ -60,12 +79,14 @@ public EntityLineageResult getLineage( int count, int maxHops, boolean separateSiblings, + boolean includeGhostEntities, @Nonnull Set visitedUrns) { if (separateSiblings) { return ValidationUtils.validateEntityLineageResult( opContext, _graphService.getLineage(opContext, entityUrn, direction, offset, count, maxHops), - _entityService); + _entityService, + includeGhostEntities); } if (maxHops > 1) { @@ -89,7 +110,7 @@ public EntityLineageResult getLineage( // remove your siblings from your lineage entityLineage = filterLineageResultFromSiblings( - opContext, entityUrn, allSiblingsInGroup, entityLineage, null); + opContext, entityUrn, allSiblingsInGroup, entityLineage, null, includeGhostEntities); // Update offset and count to fetch the correct number of edges from the next sibling node offset = Math.max(0, offset - entityLineage.getTotal()); @@ -109,8 +130,17 @@ public EntityLineageResult getLineage( siblingUrn, allSiblingsInGroup, getLineage( - opContext, siblingUrn, direction, offset, count, maxHops, false, visitedUrns), - entityLineage); + opContext, + siblingUrn, + direction, + offset, + count, + maxHops, + false, + includeGhostEntities, + visitedUrns), + entityLineage, + includeGhostEntities); // Update offset and count to fetch the correct number of edges from the next sibling node offset = Math.max(0, offset - nextEntityLineage.getTotal()); @@ -122,7 +152,8 @@ public EntityLineageResult getLineage( ; } - return ValidationUtils.validateEntityLineageResult(opContext, entityLineage, _entityService); + return ValidationUtils.validateEntityLineageResult( + opContext, entityLineage, _entityService, includeGhostEntities); } private int getFiltered(@Nullable EntityLineageResult entityLineageResult) { @@ -138,7 +169,8 @@ private EntityLineageResult filterLineageResultFromSiblings( @Nonnull final Urn urn, @Nonnull final Set allSiblingsInGroup, @Nonnull final EntityLineageResult entityLineageResult, - @Nullable final EntityLineageResult existingResult) { + @Nullable final EntityLineageResult existingResult, + boolean includeGhostEntities) { int numFiltered = 0; // 1) remove the source entities siblings from this entity's downstreams @@ -231,6 +263,6 @@ private EntityLineageResult filterLineageResultFromSiblings( combinedLineageResult.setFiltered( numFiltered + getFiltered(existingResult) + getFiltered(entityLineageResult)); return ValidationUtils.validateEntityLineageResult( - opContext, combinedLineageResult, _entityService); + opContext, combinedLineageResult, _entityService, includeGhostEntities); } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java index 8c7f0e3256cf82..a801cab81c952f 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java @@ -128,6 +128,9 @@ private static void addFilterToQueryBuilder( criterion.getValues()))); orQuery.should(andQuery); } + if (!orQuery.should().isEmpty()) { + orQuery.minimumShouldMatch(1); + } rootQuery.filter(orQuery); } @@ -177,21 +180,26 @@ private SearchResponse executeGroupByLineageSearchQuery( // directions for lineage // set up filters for each relationship type in the correct direction to limit buckets BoolQueryBuilder sourceFilterQuery = QueryBuilders.boolQuery(); - sourceFilterQuery.minimumShouldMatch(1); + validEdges.stream() .filter(pair -> RelationshipDirection.OUTGOING.equals(pair.getValue().getDirection())) .forEach( pair -> sourceFilterQuery.should( getAggregationFilter(pair, RelationshipDirection.OUTGOING))); + if (!sourceFilterQuery.should().isEmpty()) { + sourceFilterQuery.minimumShouldMatch(1); + } BoolQueryBuilder destFilterQuery = QueryBuilders.boolQuery(); - destFilterQuery.minimumShouldMatch(1); validEdges.stream() .filter(pair -> RelationshipDirection.INCOMING.equals(pair.getValue().getDirection())) .forEach( pair -> destFilterQuery.should(getAggregationFilter(pair, RelationshipDirection.INCOMING))); + if (!destFilterQuery.should().isEmpty()) { + destFilterQuery.minimumShouldMatch(1); + } FilterAggregationBuilder sourceRelationshipTypeFilters = AggregationBuilders.filter(FILTER_BY_SOURCE_RELATIONSHIP, sourceFilterQuery); @@ -347,6 +355,9 @@ public static BoolQueryBuilder buildQuery( relationshipType -> relationshipQuery.should( QueryBuilders.termQuery(RELATIONSHIP_TYPE, relationshipType))); + if (!relationshipQuery.should().isEmpty()) { + relationshipQuery.minimumShouldMatch(1); + } finalQuery.filter(relationshipQuery); } @@ -697,6 +708,9 @@ public static QueryBuilder getLineageQuery( urns, edgesPerEntityType.get(entityType), graphFilters)); } }); + if (!entityTypeQueries.should().isEmpty()) { + entityTypeQueries.minimumShouldMatch(1); + } BoolQueryBuilder finalQuery = QueryBuilders.boolQuery(); @@ -741,6 +755,10 @@ static QueryBuilder getLineageQueryForEntityType( query.should(getIncomingEdgeQuery(urns, incomingEdges, graphFilters)); } + if (!query.should().isEmpty()) { + query.minimumShouldMatch(1); + } + return query; } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/GraphFilterUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/GraphFilterUtils.java index 982bcae9b5fd96..b57b5b0b4b5eb2 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/GraphFilterUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/GraphFilterUtils.java @@ -37,11 +37,14 @@ public static QueryBuilder getUrnStatusQuery( if (removed) { finalQuery.filter(QueryBuilders.termQuery(statusField, removed.toString())); } else { - finalQuery.minimumShouldMatch(1); finalQuery.should(QueryBuilders.termQuery(statusField, removed.toString())); finalQuery.should(QueryBuilders.boolQuery().mustNot(QueryBuilders.existsQuery(statusField))); } + if (!finalQuery.should().isEmpty()) { + finalQuery.minimumShouldMatch(1); + } + return finalQuery; } @@ -102,7 +105,7 @@ public static QueryBuilder getEdgeTimeFilterQuery( * 2. The createdOn and updatedOn window does not exist on the edge at all (support legacy cases) * 3. Special lineage case: The edge is marked as a "manual" edge, meaning that the time filters should NOT be applied. */ - BoolQueryBuilder timeFilterQuery = QueryBuilders.boolQuery(); + BoolQueryBuilder timeFilterQuery = QueryBuilders.boolQuery().minimumShouldMatch(1); timeFilterQuery.should(buildTimeWindowFilter(startTimeMillis, endTimeMillis)); timeFilterQuery.should(buildTimestampsMissingFilter()); timeFilterQuery.should(buildManualLineageFilter()); @@ -158,7 +161,7 @@ public static QueryBuilder getEdgeTimeFilterQuery( */ private static QueryBuilder buildTimeWindowFilter( final long startTimeMillis, final long endTimeMillis) { - final BoolQueryBuilder timeWindowQuery = QueryBuilders.boolQuery(); + final BoolQueryBuilder timeWindowQuery = QueryBuilders.boolQuery().minimumShouldMatch(1); /* * To perform comparison: @@ -198,7 +201,7 @@ private static QueryBuilder buildTimestampsMissingFilter() { private static QueryBuilder buildNotExistsFilter(String fieldName) { // This filter returns 'true' if the field DOES NOT EXIST or it exists but is equal to 0. - final BoolQueryBuilder notExistsFilter = QueryBuilders.boolQuery(); + final BoolQueryBuilder notExistsFilter = QueryBuilders.boolQuery().minimumShouldMatch(1); notExistsFilter.should(QueryBuilders.boolQuery().mustNot(QueryBuilders.existsQuery(fieldName))); notExistsFilter.should(QueryBuilders.boolQuery().must(QueryBuilders.termQuery(fieldName, 0L))); return notExistsFilter; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java index e66b12db891df8..6001e2f6e660fa 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java @@ -2,6 +2,7 @@ import static com.linkedin.metadata.search.utils.SearchUtils.applyDefaultSearchFlags; +import com.google.common.annotations.VisibleForTesting; import com.linkedin.common.urn.Urn; import com.linkedin.metadata.browse.BrowseResult; import com.linkedin.metadata.browse.BrowseResultV2; @@ -30,6 +31,7 @@ import java.util.Optional; import javax.annotation.Nonnull; import javax.annotation.Nullable; +import lombok.Getter; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.opensearch.action.explain.ExplainResponse; @@ -51,7 +53,7 @@ public class ElasticSearchService implements EntitySearchService, ElasticSearchI private static final int MAX_RUN_IDS_INDEXED = 25; // Save the previous 25 run ids in the index. private final EntityIndexBuilders indexBuilders; - private final ESSearchDAO esSearchDAO; + @VisibleForTesting @Getter private final ESSearchDAO esSearchDAO; private final ESBrowseDAO esBrowseDAO; private final ESWriteDAO esWriteDAO; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java index cec73de7041263..f09a81c0c8b891 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java @@ -6,7 +6,9 @@ import com.codahale.metrics.Timer; import com.datahub.util.exception.ESQueryException; +import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.annotations.VisibleForTesting; import com.linkedin.data.template.LongMap; import com.linkedin.metadata.config.search.SearchConfiguration; @@ -78,6 +80,24 @@ public class ESSearchDAO { @Nonnull private final SearchConfiguration searchConfiguration; @Nullable private final CustomSearchConfiguration customSearchConfiguration; @Nonnull private final QueryFilterRewriteChain queryFilterRewriteChain; + private final boolean testLoggingEnabled; + + public ESSearchDAO( + RestHighLevelClient client, + boolean pointInTimeCreationEnabled, + String elasticSearchImplementation, + @Nonnull SearchConfiguration searchConfiguration, + @Nullable CustomSearchConfiguration customSearchConfiguration, + @Nonnull QueryFilterRewriteChain queryFilterRewriteChain) { + this( + client, + pointInTimeCreationEnabled, + elasticSearchImplementation, + searchConfiguration, + customSearchConfiguration, + queryFilterRewriteChain, + false); + } public long docCount(@Nonnull OperationContext opContext, @Nonnull String entityName) { return docCount(opContext, entityName, null); @@ -279,6 +299,11 @@ public SearchResult search( searchRequest.indices( entityNames.stream().map(indexConvention::getEntityIndexName).toArray(String[]::new)); searchRequestTimer.stop(); + + if (testLoggingEnabled) { + testLog(opContext.getObjectMapper(), searchRequest); + } + // Step 2: execute the query and extract results, validated against document model as well return executeAndExtract(opContext, entitySpecs, searchRequest, transformedFilters, from, size); } @@ -478,6 +503,11 @@ public ScrollResult scroll( } scrollRequestTimer.stop(); + + if (testLoggingEnabled) { + testLog(opContext.getObjectMapper(), searchRequest); + } + return executeAndExtract( opContext, entitySpecs, searchRequest, transformedFilters, keepAlive, size); } @@ -605,4 +635,21 @@ public ExplainResponse explain( throw new IllegalStateException("Failed to explain query:", e); } } + + private void testLog(ObjectMapper mapper, SearchRequest searchRequest) { + try { + log.warn("SearchRequest(custom): {}", mapper.writeValueAsString(customSearchConfiguration)); + final String[] indices = searchRequest.indices(); + log.warn( + String.format( + "SearchRequest(indices): %s", + mapper.writerWithDefaultPrettyPrinter().writeValueAsString(indices))); + log.warn( + String.format( + "SearchRequest(query): %s", + mapper.writeValueAsString(mapper.readTree(searchRequest.source().toString())))); + } catch (JsonProcessingException e) { + log.warn("Error writing test log"); + } + } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/BaseQueryFilterRewriter.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/BaseQueryFilterRewriter.java index 367705d369c7ce..d545f60a1ee8fa 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/BaseQueryFilterRewriter.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/BaseQueryFilterRewriter.java @@ -110,9 +110,12 @@ private BoolQueryBuilder handleNestedFilters( mustNotQueryBuilders.forEach(expandedQueryBuilder::mustNot); expandedQueryBuilder.queryName(boolQueryBuilder.queryName()); expandedQueryBuilder.adjustPureNegative(boolQueryBuilder.adjustPureNegative()); - expandedQueryBuilder.minimumShouldMatch(boolQueryBuilder.minimumShouldMatch()); expandedQueryBuilder.boost(boolQueryBuilder.boost()); + if (!expandedQueryBuilder.should().isEmpty()) { + expandedQueryBuilder.minimumShouldMatch(1); + } + return expandedQueryBuilder; } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java index b7a04f2064d9b4..294efb069a9046 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java @@ -115,8 +115,7 @@ public SearchRequest getSearchRequest( QueryConfiguration customQueryConfig = customizedQueryHandler.lookupQueryConfig(input).orElse(null); - BoolQueryBuilder baseQuery = QueryBuilders.boolQuery(); - baseQuery.minimumShouldMatch(1); + BoolQueryBuilder baseQuery = QueryBuilders.boolQuery().minimumShouldMatch(1); // Initial query with input filters BoolQueryBuilder filterQuery = @@ -176,12 +175,15 @@ public BoolQueryBuilder getQuery( BoolQueryBuilder finalQuery = Optional.ofNullable(customAutocompleteConfig) .flatMap(cac -> CustomizedQueryHandler.boolQueryBuilder(objectMapper, cac, query)) - .orElse(QueryBuilders.boolQuery()) - .minimumShouldMatch(1); + .orElse(QueryBuilders.boolQuery()); getAutocompleteQuery(customAutocompleteConfig, autocompleteFields, query) .ifPresent(finalQuery::should); + if (!finalQuery.should().isEmpty()) { + finalQuery.minimumShouldMatch(1); + } + return finalQuery; } @@ -200,8 +202,7 @@ private Optional getAutocompleteQuery( private static BoolQueryBuilder defaultQuery( List autocompleteFields, @Nonnull String query) { - BoolQueryBuilder finalQuery = QueryBuilders.boolQuery(); - finalQuery.minimumShouldMatch(1); + BoolQueryBuilder finalQuery = QueryBuilders.boolQuery().minimumShouldMatch(1); // Search for exact matches with higher boost and ngram matches MultiMatchQueryBuilder autocompleteQueryBuilder = diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java index 3e76d3600d6a68..529c13c7d71ef1 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java @@ -103,8 +103,7 @@ private QueryBuilder buildInternalQuery( cqc -> CustomizedQueryHandler.boolQueryBuilder( opContext.getObjectMapper(), cqc, sanitizedQuery)) - .orElse(QueryBuilders.boolQuery()) - .minimumShouldMatch(1); + .orElse(QueryBuilders.boolQuery()); if (fulltext && !query.startsWith(STRUCTURED_QUERY_PREFIX)) { getSimpleQuery(opContext.getEntityRegistry(), customQueryConfig, entitySpecs, sanitizedQuery) @@ -135,6 +134,10 @@ private QueryBuilder buildInternalQuery( } } + if (!finalQuery.should().isEmpty()) { + finalQuery.minimumShouldMatch(1); + } + return finalQuery; } @@ -368,6 +371,10 @@ private Optional getSimpleQuery( simplePerField.should(simpleBuilder); }); + if (!simplePerField.should().isEmpty()) { + simplePerField.minimumShouldMatch(1); + } + result = Optional.of(simplePerField); } @@ -454,7 +461,9 @@ private Optional getPrefixAndExactMatchQuery( } }); - return finalQuery.should().size() > 0 ? Optional.of(finalQuery) : Optional.empty(); + return finalQuery.should().size() > 0 + ? Optional.of(finalQuery.minimumShouldMatch(1)) + : Optional.empty(); } private Optional getStructuredQuery( diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java index c935e6f54742c3..cb02fb1c8b2f76 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java @@ -58,6 +58,7 @@ import java.util.stream.Stream; import javax.annotation.Nonnull; import javax.annotation.Nullable; +import lombok.Getter; import lombok.extern.slf4j.Slf4j; import org.apache.commons.collections.CollectionUtils; import org.opensearch.action.search.SearchRequest; @@ -80,7 +81,7 @@ public class SearchRequestHandler { private static final Map, SearchRequestHandler> REQUEST_HANDLER_BY_ENTITY_NAME = new ConcurrentHashMap<>(); private final List entitySpecs; - private final Set defaultQueryFieldNames; + @Getter private final Set defaultQueryFieldNames; @Nonnull private final HighlightBuilder highlights; private final SearchConfiguration configs; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java b/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java index b6d9357ecd65e8..4bb8e0630de480 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java @@ -210,8 +210,13 @@ public void setSearchableValue( fieldName, JsonNodeFactory.instance.booleanNode((Boolean) firstValue.orElse(false))); } else { - searchDocument.set( - fieldName, JsonNodeFactory.instance.booleanNode(!fieldValues.isEmpty())); + final boolean hasValue; + if (DataSchema.Type.STRING.equals(valueType)) { + hasValue = firstValue.isPresent() && !String.valueOf(firstValue.get()).isEmpty(); + } else { + hasValue = !fieldValues.isEmpty(); + } + searchDocument.set(fieldName, JsonNodeFactory.instance.booleanNode(hasValue)); } }); @@ -390,7 +395,7 @@ private Optional getNodeForValue( default: String value = fieldValue.toString(); return value.isEmpty() - ? Optional.empty() + ? Optional.of(JsonNodeFactory.instance.nullNode()) : Optional.of(JsonNodeFactory.instance.textNode(value)); } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java index ace7fa2bc197c6..e135f1941bfec9 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java @@ -166,8 +166,6 @@ public static BoolQueryBuilder buildFilterQuery( searchableFieldTypes, opContext, queryFilterRewriteChain))); - // The default is not always 1 (ensure consistent default) - finalQueryBuilder.minimumShouldMatch(1); } else if (filter.getCriteria() != null) { // Otherwise, build boolean query from the deprecated "criteria" field. log.warn("Received query Filter with a deprecated field 'criteria'. Use 'or' instead."); @@ -187,7 +185,8 @@ public static BoolQueryBuilder buildFilterQuery( } }); finalQueryBuilder.should(andQueryBuilder); - // The default is not always 1 (ensure consistent default) + } + if (!finalQueryBuilder.should().isEmpty()) { finalQueryBuilder.minimumShouldMatch(1); } return finalQueryBuilder; @@ -533,7 +532,7 @@ private static QueryBuilder getQueryBuilderFromCriterionForFieldToExpand( final Map> searchableFieldTypes, @Nonnull OperationContext opContext, @Nonnull QueryFilterRewriteChain queryFilterRewriteChain) { - final BoolQueryBuilder orQueryBuilder = new BoolQueryBuilder(); + final BoolQueryBuilder orQueryBuilder = new BoolQueryBuilder().minimumShouldMatch(1); for (String field : fields) { orQueryBuilder.should( getQueryBuilderFromCriterionForSingleField( @@ -552,6 +551,10 @@ private static QueryBuilder getQueryBuilderFromCriterionForFieldToExpand( return orQueryBuilder; } + private static boolean isCaseInsensitiveSearchEnabled(Condition condition) { + return condition == Condition.IEQUAL; + } + @Nonnull private static QueryBuilder getQueryBuilderFromCriterionForSingleField( @Nonnull Criterion criterion, @@ -564,6 +567,8 @@ private static QueryBuilder getQueryBuilderFromCriterionForSingleField( final AspectRetriever aspectRetriever = opContext.getAspectRetriever(); final String fieldName = toParentField(criterion.getField(), aspectRetriever); + boolean enableCaseInsensitiveSearch; + if (condition == Condition.IS_NULL) { return QueryBuilders.boolQuery() .mustNot(QueryBuilders.existsQuery(fieldName)) @@ -573,9 +578,15 @@ private static QueryBuilder getQueryBuilderFromCriterionForSingleField( .must(QueryBuilders.existsQuery(fieldName)) .queryName(queryName != null ? queryName : fieldName); } else if (criterion.hasValues()) { - if (condition == Condition.EQUAL) { + if (condition == Condition.EQUAL || condition == Condition.IEQUAL) { + enableCaseInsensitiveSearch = isCaseInsensitiveSearchEnabled(condition); return buildEqualsConditionFromCriterion( - fieldName, criterion, isTimeseries, searchableFieldTypes, aspectRetriever) + fieldName, + criterion, + isTimeseries, + searchableFieldTypes, + aspectRetriever, + enableCaseInsensitiveSearch) .queryName(queryName != null ? queryName : fieldName); } else if (RANGE_QUERY_CONDITIONS.contains(condition)) { return buildRangeQueryFromCriterion( @@ -596,7 +607,7 @@ private static QueryBuilder getQueryBuilderFromCriterionForSingleField( return buildEndsWithConditionFromCriterion( fieldName, criterion, queryName, isTimeseries, aspectRetriever); } else if (Set.of(ANCESTORS_INCL, DESCENDANTS_INCL, RELATED_INCL).contains(condition)) { - + enableCaseInsensitiveSearch = isCaseInsensitiveSearchEnabled(condition); return QueryFilterRewriterContext.builder() .queryFilterRewriteChain(queryFilterRewriteChain) .condition(condition) @@ -605,7 +616,12 @@ private static QueryBuilder getQueryBuilderFromCriterionForSingleField( .rewrite( opContext, buildEqualsConditionFromCriterion( - fieldName, criterion, isTimeseries, searchableFieldTypes, aspectRetriever)) + fieldName, + criterion, + isTimeseries, + searchableFieldTypes, + aspectRetriever, + enableCaseInsensitiveSearch)) .queryName(queryName != null ? queryName : fieldName); } } @@ -619,7 +635,7 @@ private static QueryBuilder buildWildcardQueryWithMultipleValues( @Nullable String queryName, @Nonnull AspectRetriever aspectRetriever, String wildcardPattern) { - BoolQueryBuilder boolQuery = QueryBuilders.boolQuery(); + BoolQueryBuilder boolQuery = QueryBuilders.boolQuery().minimumShouldMatch(1); for (String value : criterion.getValues()) { boolQuery.should( @@ -670,9 +686,15 @@ private static QueryBuilder buildEqualsConditionFromCriterion( @Nonnull final Criterion criterion, final boolean isTimeseries, final Map> searchableFieldTypes, - @Nonnull AspectRetriever aspectRetriever) { + @Nonnull AspectRetriever aspectRetriever, + boolean enableCaseInsensitiveSearch) { return buildEqualsConditionFromCriterionWithValues( - fieldName, criterion, isTimeseries, searchableFieldTypes, aspectRetriever); + fieldName, + criterion, + isTimeseries, + searchableFieldTypes, + aspectRetriever, + enableCaseInsensitiveSearch); } /** @@ -684,7 +706,8 @@ private static QueryBuilder buildEqualsConditionFromCriterionWithValues( @Nonnull final Criterion criterion, final boolean isTimeseries, final Map> searchableFieldTypes, - @Nonnull AspectRetriever aspectRetriever) { + @Nonnull AspectRetriever aspectRetriever, + boolean enableCaseInsensitiveSearch) { Set fieldTypes = getFieldTypes(searchableFieldTypes, fieldName, aspectRetriever); if (fieldTypes.size() > 1) { log.warn( @@ -704,6 +727,21 @@ private static QueryBuilder buildEqualsConditionFromCriterionWithValues( criterion.getValues().stream().map(Double::parseDouble).collect(Collectors.toList()); return QueryBuilders.termsQuery(fieldName, doubleValues).queryName(fieldName); } + + if (enableCaseInsensitiveSearch) { + BoolQueryBuilder boolQuery = QueryBuilders.boolQuery(); + criterion + .getValues() + .forEach( + value -> + boolQuery.should( + QueryBuilders.termQuery( + toKeywordField(criterion.getField(), isTimeseries, aspectRetriever), + value.trim()) + .caseInsensitive(true))); + return boolQuery; + } + return QueryBuilders.termsQuery( toKeywordField(criterion.getField(), isTimeseries, aspectRetriever), criterion.getValues()) diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/SchemaMetadataChangeEventGenerator.java b/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/SchemaMetadataChangeEventGenerator.java index c6a48ea27cbf3f..53f757d8d6c6b1 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/SchemaMetadataChangeEventGenerator.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/SchemaMetadataChangeEventGenerator.java @@ -4,6 +4,7 @@ import com.datahub.util.RecordUtils; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.DatasetUrn; import com.linkedin.common.urn.Urn; @@ -173,7 +174,7 @@ private static List getFieldPropertyChangeEvents( SchemaField baseField, SchemaField targetField, Urn datasetUrn, - ChangeCategory changeCategory, + Set changeCategories, AuditStamp auditStamp) { List propChangeEvents = new ArrayList<>(); String datasetFieldUrn; @@ -184,7 +185,7 @@ private static List getFieldPropertyChangeEvents( } // Description Change. - if (ChangeCategory.DOCUMENTATION.equals(changeCategory)) { + if (changeCategories != null && changeCategories.contains(ChangeCategory.DOCUMENTATION)) { ChangeEvent descriptionChangeEvent = getDescriptionChange(baseField, targetField, datasetFieldUrn, auditStamp); if (descriptionChangeEvent != null) { @@ -193,14 +194,14 @@ private static List getFieldPropertyChangeEvents( } // Global Tags - if (ChangeCategory.TAG.equals(changeCategory)) { + if (changeCategories != null && changeCategories.contains(ChangeCategory.TAG)) { propChangeEvents.addAll( getGlobalTagChangeEvents( baseField, targetField, datasetUrn.toString(), datasetFieldUrn, auditStamp)); } // Glossary terms. - if (ChangeCategory.GLOSSARY_TERM.equals(changeCategory)) { + if (changeCategories != null && changeCategories.contains(ChangeCategory.GLOSSARY_TERM)) { propChangeEvents.addAll( getGlossaryTermsChangeEvents( baseField, targetField, datasetUrn.toString(), datasetFieldUrn, auditStamp)); @@ -213,7 +214,7 @@ private static List computeDiffs( SchemaMetadata baseSchema, SchemaMetadata targetSchema, Urn datasetUrn, - ChangeCategory changeCategory, + Set changeCategories, AuditStamp auditStamp) { // Sort the fields by their field path. This aligns both sets of fields based on field paths for // comparisons. @@ -247,11 +248,11 @@ private static List computeDiffs( // This is the same field. Check for change events from property changes. if (!curBaseField.getNativeDataType().equals(curTargetField.getNativeDataType())) { processNativeTypeChange( - changeCategory, changeEvents, datasetUrn, curBaseField, curTargetField, auditStamp); + changeCategories, changeEvents, datasetUrn, curBaseField, curTargetField, auditStamp); } List propChangeEvents = getFieldPropertyChangeEvents( - curBaseField, curTargetField, datasetUrn, changeCategory, auditStamp); + curBaseField, curTargetField, datasetUrn, changeCategories, auditStamp); changeEvents.addAll(propChangeEvents); ++baseFieldIdx; ++targetFieldIdx; @@ -268,16 +269,17 @@ private static List computeDiffs( targetFields.subList(targetFieldIdx, targetFields.size()), renamedFields); if (renamedField == null) { - processRemoval(changeCategory, changeEvents, datasetUrn, curBaseField, auditStamp); + processRemoval(changeCategories, changeEvents, datasetUrn, curBaseField, auditStamp); ++baseFieldIdx; } else { - if (ChangeCategory.TECHNICAL_SCHEMA.equals(changeCategory)) { + if (changeCategories != null + && changeCategories.contains(ChangeCategory.TECHNICAL_SCHEMA)) { changeEvents.add( generateRenameEvent(datasetUrn, curBaseField, renamedField, auditStamp)); } List propChangeEvents = getFieldPropertyChangeEvents( - curBaseField, curTargetField, datasetUrn, changeCategory, auditStamp); + curBaseField, renamedField, datasetUrn, changeCategories, auditStamp); changeEvents.addAll(propChangeEvents); ++baseFieldIdx; renamedFields.add(renamedField); @@ -289,16 +291,17 @@ private static List computeDiffs( findRenamedField( curTargetField, baseFields.subList(baseFieldIdx, baseFields.size()), renamedFields); if (renamedField == null) { - processAdd(changeCategory, changeEvents, datasetUrn, curTargetField, auditStamp); + processAdd(changeCategories, changeEvents, datasetUrn, curTargetField, auditStamp); ++targetFieldIdx; } else { - if (ChangeCategory.TECHNICAL_SCHEMA.equals(changeCategory)) { + if (changeCategories != null + && changeCategories.contains(ChangeCategory.TECHNICAL_SCHEMA)) { changeEvents.add( generateRenameEvent(datasetUrn, renamedField, curTargetField, auditStamp)); } List propChangeEvents = getFieldPropertyChangeEvents( - curBaseField, curTargetField, datasetUrn, changeCategory, auditStamp); + renamedField, curTargetField, datasetUrn, changeCategories, auditStamp); changeEvents.addAll(propChangeEvents); ++targetFieldIdx; renamedFields.add(renamedField); @@ -309,7 +312,7 @@ private static List computeDiffs( // Handle removed fields. Non-backward compatible change + major version bump SchemaField baseField = baseFields.get(baseFieldIdx); if (!renamedFields.contains(baseField)) { - processRemoval(changeCategory, changeEvents, datasetUrn, baseField, auditStamp); + processRemoval(changeCategories, changeEvents, datasetUrn, baseField, auditStamp); } ++baseFieldIdx; } @@ -317,14 +320,15 @@ private static List computeDiffs( // Newly added fields. Forwards & backwards compatible change + minor version bump. SchemaField targetField = targetFields.get(targetFieldIdx); if (!renamedFields.contains(targetField)) { - processAdd(changeCategory, changeEvents, datasetUrn, targetField, auditStamp); + processAdd(changeCategories, changeEvents, datasetUrn, targetField, auditStamp); } ++targetFieldIdx; } // Handle primary key constraint change events. List primaryKeyChangeEvents = - getPrimaryKeyChangeEvents(changeCategory, baseSchema, targetSchema, datasetUrn, auditStamp); + getPrimaryKeyChangeEvents( + changeCategories, baseSchema, targetSchema, datasetUrn, auditStamp); changeEvents.addAll(primaryKeyChangeEvents); // Handle foreign key constraint change events, currently no-op due to field not being utilized. @@ -375,12 +379,12 @@ private static boolean descriptionsMatch(SchemaField curField, SchemaField schem } private static void processRemoval( - ChangeCategory changeCategory, + Set changeCategories, List changeEvents, Urn datasetUrn, SchemaField baseField, AuditStamp auditStamp) { - if (ChangeCategory.TECHNICAL_SCHEMA.equals(changeCategory)) { + if (changeCategories != null && changeCategories.contains(ChangeCategory.TECHNICAL_SCHEMA)) { changeEvents.add( DatasetSchemaFieldChangeEvent.schemaFieldChangeEventBuilder() .modifier(getSchemaFieldUrn(datasetUrn, baseField).toString()) @@ -401,17 +405,17 @@ private static void processRemoval( .build()); } List propChangeEvents = - getFieldPropertyChangeEvents(baseField, null, datasetUrn, changeCategory, auditStamp); + getFieldPropertyChangeEvents(baseField, null, datasetUrn, changeCategories, auditStamp); changeEvents.addAll(propChangeEvents); } private static void processAdd( - ChangeCategory changeCategory, + Set changeCategories, List changeEvents, Urn datasetUrn, SchemaField targetField, AuditStamp auditStamp) { - if (ChangeCategory.TECHNICAL_SCHEMA.equals(changeCategory)) { + if (changeCategories != null && changeCategories.contains(ChangeCategory.TECHNICAL_SCHEMA)) { changeEvents.add( DatasetSchemaFieldChangeEvent.schemaFieldChangeEventBuilder() .modifier(getSchemaFieldUrn(datasetUrn, targetField).toString()) @@ -428,22 +432,23 @@ private static void processAdd( .fieldUrn(getSchemaFieldUrn(datasetUrn, targetField)) .nullable(targetField.isNullable()) .auditStamp(auditStamp) + .modificationCategory(SchemaFieldModificationCategory.OTHER) .build()); } List propChangeEvents = - getFieldPropertyChangeEvents(null, targetField, datasetUrn, changeCategory, auditStamp); + getFieldPropertyChangeEvents(null, targetField, datasetUrn, changeCategories, auditStamp); changeEvents.addAll(propChangeEvents); } private static void processNativeTypeChange( - ChangeCategory changeCategory, + Set changeCategories, List changeEvents, Urn datasetUrn, SchemaField curBaseField, SchemaField curTargetField, AuditStamp auditStamp) { // Non-backward compatible change + Major version bump - if (ChangeCategory.TECHNICAL_SCHEMA.equals(changeCategory)) { + if (changeCategories != null && changeCategories.contains(ChangeCategory.TECHNICAL_SCHEMA)) { changeEvents.add( DatasetSchemaFieldChangeEvent.schemaFieldChangeEventBuilder() .category(ChangeCategory.TECHNICAL_SCHEMA) @@ -505,12 +510,12 @@ private static List getForeignKeyChangeEvents() { } private static List getPrimaryKeyChangeEvents( - ChangeCategory changeCategory, + Set changeCategories, SchemaMetadata baseSchema, SchemaMetadata targetSchema, Urn datasetUrn, AuditStamp auditStamp) { - if (ChangeCategory.TECHNICAL_SCHEMA.equals(changeCategory)) { + if (changeCategories != null && changeCategories.contains(ChangeCategory.TECHNICAL_SCHEMA)) { List primaryKeyChangeEvents = new ArrayList<>(); Set basePrimaryKeys = (baseSchema != null && baseSchema.getPrimaryKeys() != null) @@ -598,7 +603,7 @@ public ChangeTransaction getSemanticDiff( baseSchema, targetSchema, DatasetUrn.createFromString(currentValue.getUrn()), - changeCategory, + Collections.singleton(changeCategory), null)); } catch (URISyntaxException e) { throw new IllegalArgumentException("Malformed DatasetUrn " + currentValue.getUrn()); @@ -632,18 +637,16 @@ public List getChangeEvents( @Nonnull Aspect from, @Nonnull Aspect to, @Nonnull AuditStamp auditStamp) { - final List changeEvents = new ArrayList<>(); - changeEvents.addAll( - computeDiffs( - from.getValue(), to.getValue(), urn, ChangeCategory.DOCUMENTATION, auditStamp)); - changeEvents.addAll( - computeDiffs(from.getValue(), to.getValue(), urn, ChangeCategory.TAG, auditStamp)); - changeEvents.addAll( + return new ArrayList<>( computeDiffs( - from.getValue(), to.getValue(), urn, ChangeCategory.TECHNICAL_SCHEMA, auditStamp)); - changeEvents.addAll( - computeDiffs( - from.getValue(), to.getValue(), urn, ChangeCategory.GLOSSARY_TERM, auditStamp)); - return changeEvents; + from.getValue(), + to.getValue(), + urn, + ImmutableSet.of( + ChangeCategory.DOCUMENTATION, + ChangeCategory.TAG, + ChangeCategory.TECHNICAL_SCHEMA, + ChangeCategory.GLOSSARY_TERM), + auditStamp)); } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/dataproducts/sideeffects/DataProductUnsetSideEffectTest.java b/metadata-io/src/test/java/com/linkedin/metadata/dataproducts/sideeffects/DataProductUnsetSideEffectTest.java new file mode 100644 index 00000000000000..1151014bf1162f --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/dataproducts/sideeffects/DataProductUnsetSideEffectTest.java @@ -0,0 +1,263 @@ +package com.linkedin.metadata.dataproducts.sideeffects; + +import static com.linkedin.metadata.Constants.DATA_PRODUCT_ENTITY_NAME; +import static com.linkedin.metadata.Constants.DATA_PRODUCT_PROPERTIES_ASPECT_NAME; +import static com.linkedin.metadata.search.utils.QueryUtils.EMPTY_FILTER; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; +import static org.testng.Assert.assertEquals; + +import com.google.common.collect.ImmutableList; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.dataproduct.DataProductAssociation; +import com.linkedin.dataproduct.DataProductAssociationArray; +import com.linkedin.dataproduct.DataProductProperties; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.GraphRetriever; +import com.linkedin.metadata.aspect.batch.MCPItem; +import com.linkedin.metadata.aspect.models.graph.RelatedEntities; +import com.linkedin.metadata.aspect.models.graph.RelatedEntitiesScrollResult; +import com.linkedin.metadata.aspect.patch.GenericJsonPatch; +import com.linkedin.metadata.aspect.patch.PatchOperationType; +import com.linkedin.metadata.aspect.patch.template.dataproduct.DataProductPropertiesTemplate; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.entity.SearchRetriever; +import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; +import com.linkedin.metadata.entity.ebean.batch.MCLItemImpl; +import com.linkedin.metadata.entity.ebean.batch.PatchItemImpl; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.query.filter.RelationshipDirection; +import com.linkedin.metadata.search.utils.QueryUtils; +import com.linkedin.metadata.utils.AuditStampUtils; +import com.linkedin.test.metadata.aspect.TestEntityRegistry; +import io.datahubproject.metadata.context.RetrieverContext; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class DataProductUnsetSideEffectTest { + private static final EntityRegistry TEST_REGISTRY = new TestEntityRegistry(); + private static final List SUPPORTED_CHANGE_TYPES = + List.of( + ChangeType.CREATE, + ChangeType.PATCH, + ChangeType.CREATE_ENTITY, + ChangeType.UPSERT, + ChangeType.DELETE, + ChangeType.RESTATE); + private static final Urn TEST_PRODUCT_URN = + UrnUtils.getUrn("urn:li:dataProduct:someDataProductId"); + + private static final Urn TEST_PRODUCT_URN_2 = + UrnUtils.getUrn("urn:li:dataProduct:someOtherDataProductId"); + + private static final Urn DATASET_URN_1 = + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)"); + private static final Urn DATASET_URN_2 = + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_deleted,PROD)"); + private static final AspectPluginConfig TEST_PLUGIN_CONFIG = + AspectPluginConfig.builder() + .className(DataProductUnsetSideEffect.class.getName()) + .enabled(true) + .supportedOperations( + SUPPORTED_CHANGE_TYPES.stream() + .map(ChangeType::toString) + .collect(Collectors.toList())) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .entityName(DATA_PRODUCT_ENTITY_NAME) + .aspectName(DATA_PRODUCT_PROPERTIES_ASPECT_NAME) + .build())) + .build(); + + private AspectRetriever mockAspectRetriever; + private RetrieverContext retrieverContext; + + @BeforeMethod + public void setup() { + mockAspectRetriever = mock(AspectRetriever.class); + when(mockAspectRetriever.getEntityRegistry()).thenReturn(TEST_REGISTRY); + GraphRetriever graphRetriever = mock(GraphRetriever.class); + RelatedEntities relatedEntities = + new RelatedEntities( + "DataProductContains", + TEST_PRODUCT_URN.toString(), + DATASET_URN_1.toString(), + RelationshipDirection.INCOMING, + null); + + List relatedEntitiesList = new ArrayList<>(); + relatedEntitiesList.add(relatedEntities); + RelatedEntitiesScrollResult relatedEntitiesScrollResult = + new RelatedEntitiesScrollResult(1, 10, null, relatedEntitiesList); + when(graphRetriever.scrollRelatedEntities( + eq(null), + eq(QueryUtils.newFilter("urn", DATASET_URN_1.toString())), + eq(null), + eq(EMPTY_FILTER), + eq(ImmutableList.of("DataProductContains")), + eq(QueryUtils.newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.INCOMING)), + eq(Collections.emptyList()), + eq(null), + eq(10), // Should only ever be one, if ever greater than ten will decrease over time to + // become consistent + eq(null), + eq(null))) + .thenReturn(relatedEntitiesScrollResult); + + RelatedEntities relatedEntities2 = + new RelatedEntities( + "DataProductContains", + TEST_PRODUCT_URN_2.toString(), + DATASET_URN_2.toString(), + RelationshipDirection.INCOMING, + null); + + List relatedEntitiesList2 = new ArrayList<>(); + relatedEntitiesList2.add(relatedEntities2); + RelatedEntitiesScrollResult relatedEntitiesScrollResult2 = + new RelatedEntitiesScrollResult(1, 10, null, relatedEntitiesList2); + when(graphRetriever.scrollRelatedEntities( + eq(null), + eq(QueryUtils.newFilter("urn", DATASET_URN_2.toString())), + eq(null), + eq(EMPTY_FILTER), + eq(ImmutableList.of("DataProductContains")), + eq(QueryUtils.newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.INCOMING)), + eq(Collections.emptyList()), + eq(null), + eq(10), // Should only ever be one, if ever greater than ten will decrease over time to + // become consistent + eq(null), + eq(null))) + .thenReturn(relatedEntitiesScrollResult2); + retrieverContext = + RetrieverContext.builder() + .searchRetriever(mock(SearchRetriever.class)) + .aspectRetriever(mockAspectRetriever) + .graphRetriever(graphRetriever) + .build(); + } + + @Test + public void testDPAlreadySetToSame() { + DataProductUnsetSideEffect test = new DataProductUnsetSideEffect(); + test.setConfig(TEST_PLUGIN_CONFIG); + + DataProductProperties dataProductProperties = getTestDataProductProperties(DATASET_URN_1); + + List testOutput; + // Run test + ChangeItemImpl dataProductPropertiesChangeItem = + ChangeItemImpl.builder() + .urn(TEST_PRODUCT_URN) + .aspectName(DATA_PRODUCT_PROPERTIES_ASPECT_NAME) + .changeType(ChangeType.UPSERT) + .entitySpec(TEST_REGISTRY.getEntitySpec(DATA_PRODUCT_ENTITY_NAME)) + .aspectSpec( + TEST_REGISTRY + .getEntitySpec(DATA_PRODUCT_ENTITY_NAME) + .getAspectSpec(DATA_PRODUCT_PROPERTIES_ASPECT_NAME)) + .recordTemplate(dataProductProperties) + .auditStamp(AuditStampUtils.createDefaultAuditStamp()) + .build(mockAspectRetriever); + testOutput = + test.postMCPSideEffect( + List.of( + MCLItemImpl.builder() + .build( + dataProductPropertiesChangeItem, + null, + null, + retrieverContext.getAspectRetriever())), + retrieverContext) + .toList(); + + // Verify test + assertEquals(testOutput.size(), 0, "Expected no additional changes: " + testOutput); + } + + @Test + public void testDPRemoveOld() { + DataProductUnsetSideEffect test = new DataProductUnsetSideEffect(); + test.setConfig(TEST_PLUGIN_CONFIG); + + DataProductProperties dataProductProperties = getTestDataProductProperties(DATASET_URN_2); + + List testOutput; + // Run test + ChangeItemImpl dataProductPropertiesChangeItem = + ChangeItemImpl.builder() + .urn(TEST_PRODUCT_URN) + .aspectName(DATA_PRODUCT_PROPERTIES_ASPECT_NAME) + .changeType(ChangeType.UPSERT) + .entitySpec(TEST_REGISTRY.getEntitySpec(DATA_PRODUCT_ENTITY_NAME)) + .aspectSpec( + TEST_REGISTRY + .getEntitySpec(DATA_PRODUCT_ENTITY_NAME) + .getAspectSpec(DATA_PRODUCT_PROPERTIES_ASPECT_NAME)) + .recordTemplate(dataProductProperties) + .auditStamp(AuditStampUtils.createDefaultAuditStamp()) + .build(mockAspectRetriever); + testOutput = + test.postMCPSideEffect( + List.of( + MCLItemImpl.builder() + .build( + dataProductPropertiesChangeItem, + null, + null, + retrieverContext.getAspectRetriever())), + retrieverContext) + .toList(); + + // Verify test + assertEquals(testOutput.size(), 1, "Expected removal of previous data product: " + testOutput); + + GenericJsonPatch.PatchOp patchOp = new GenericJsonPatch.PatchOp(); + patchOp.setOp(PatchOperationType.REMOVE.getValue()); + patchOp.setPath(String.format("/assets/%s", DATASET_URN_2)); + + assertEquals( + testOutput, + List.of( + PatchItemImpl.builder() + .urn(TEST_PRODUCT_URN_2) + .aspectName(DATA_PRODUCT_PROPERTIES_ASPECT_NAME) + .patch( + GenericJsonPatch.builder() + .arrayPrimaryKeys( + Map.of( + DataProductPropertiesTemplate.ASSETS_FIELD_NAME, + List.of(DataProductPropertiesTemplate.KEY_FIELD_NAME))) + .patch(List.of(patchOp)) + .build() + .getJsonPatch()) + .entitySpec(TEST_REGISTRY.getEntitySpec(DATA_PRODUCT_ENTITY_NAME)) + .aspectSpec( + TEST_REGISTRY + .getEntitySpec(DATA_PRODUCT_ENTITY_NAME) + .getAspectSpec(DATA_PRODUCT_PROPERTIES_ASPECT_NAME)) + .auditStamp(dataProductPropertiesChangeItem.getAuditStamp()) + .systemMetadata(dataProductPropertiesChangeItem.getSystemMetadata()) + .build(mockAspectRetriever.getEntityRegistry()))); + } + + private static DataProductProperties getTestDataProductProperties(Urn destinationUrn) { + DataProductProperties dataProductProperties = new DataProductProperties(); + DataProductAssociationArray dataProductAssociations = new DataProductAssociationArray(); + DataProductAssociation dataProductAssociation1 = new DataProductAssociation(); + dataProductAssociation1.setDestinationUrn(destinationUrn); + dataProductAssociations.add(dataProductAssociation1); + dataProductProperties.setAssets(dataProductAssociations); + return dataProductProperties; + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/DeleteEntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/DeleteEntityServiceTest.java index d585ff1ce8383f..0e8ee08e60739f 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/DeleteEntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/DeleteEntityServiceTest.java @@ -2,7 +2,9 @@ import static com.linkedin.metadata.search.utils.QueryUtils.*; import static org.mockito.Mockito.*; -import static org.testng.AssertJUnit.*; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; import com.datahub.util.RecordUtils; import com.google.common.collect.ImmutableList; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/ebean/EbeanAspectDaoTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/ebean/EbeanAspectDaoTest.java index 43123fb9872a0f..109c9b5c44efb9 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/ebean/EbeanAspectDaoTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/ebean/EbeanAspectDaoTest.java @@ -4,6 +4,8 @@ import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; import com.linkedin.metadata.EbeanTestUtils; import com.linkedin.metadata.aspect.batch.AspectsBatch; import com.linkedin.metadata.config.EbeanConfiguration; @@ -41,15 +43,15 @@ public void testGetNextVersionForUpdate() { // Get the captured SQL statements List sql = LoggedSql.stop().stream() - .filter(str -> !str.contains("INFORMATION_SCHEMA.TABLES")) + .filter(str -> str.contains("(t0.urn,t0.aspect,t0.version)")) .toList(); - assertEquals(sql.size(), 2, String.format("Found: %s", sql)); + assertEquals(sql.size(), 1, String.format("Found: %s", sql)); assertTrue( sql.get(0).contains("for update;"), String.format("Did not find `for update` in %s ", sql)); } @Test - public void testGetLatestAspectsForUpdate() { + public void testGetLatestAspectsForUpdate() throws JsonProcessingException { LoggedSql.start(); testDao.runInTransactionWithRetryUnlocked( @@ -63,9 +65,10 @@ public void testGetLatestAspectsForUpdate() { // Get the captured SQL statements List sql = LoggedSql.stop().stream() - .filter(str -> !str.contains("INFORMATION_SCHEMA.TABLES")) + .filter(str -> str.contains("(t0.urn,t0.aspect,t0.version)")) .toList(); - assertEquals(sql.size(), 1, String.format("Found: %s", sql)); + assertEquals( + sql.size(), 1, String.format("Found: %s", new ObjectMapper().writeValueAsString(sql))); assertTrue( sql.get(0).contains("for update;"), String.format("Did not find `for update` in %s ", sql)); } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/extractor/AspectExtractorTest.java b/metadata-io/src/test/java/com/linkedin/metadata/extractor/AspectExtractorTest.java index a98386f6f871b0..f8bf2376e6bc12 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/extractor/AspectExtractorTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/extractor/AspectExtractorTest.java @@ -1,6 +1,6 @@ package com.linkedin.metadata.extractor; -import static org.testng.AssertJUnit.assertEquals; +import static org.testng.Assert.assertEquals; import com.datahub.test.TestEntityAspect; import com.datahub.test.TestEntityAspectArray; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/search/elasticsearch/SearchGraphServiceElasticSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/elasticsearch/SearchGraphServiceElasticSearchTest.java index b2c49857cb0b96..8a0dfcbe34a69e 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/search/elasticsearch/SearchGraphServiceElasticSearchTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/elasticsearch/SearchGraphServiceElasticSearchTest.java @@ -1,5 +1,7 @@ package com.linkedin.metadata.graph.search.elasticsearch; +import static org.testng.Assert.assertNotNull; + import com.linkedin.metadata.graph.search.SearchGraphServiceTestBase; import com.linkedin.metadata.search.elasticsearch.ElasticSearchSuite; import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; @@ -9,7 +11,6 @@ import org.opensearch.client.RestHighLevelClient; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.annotation.Import; -import org.testng.AssertJUnit; import org.testng.annotations.Test; @Import({ElasticSearchSuite.class, SearchTestContainerConfiguration.class}) @@ -39,6 +40,6 @@ protected ESIndexBuilder getIndexBuilder() { @Test public void initTest() { - AssertJUnit.assertNotNull(_searchClient); + assertNotNull(_searchClient); } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/search/opensearch/SearchGraphServiceOpenSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/opensearch/SearchGraphServiceOpenSearchTest.java index 28b545f8175391..08a6ea4ef2c9c7 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/search/opensearch/SearchGraphServiceOpenSearchTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/opensearch/SearchGraphServiceOpenSearchTest.java @@ -1,5 +1,7 @@ package com.linkedin.metadata.graph.search.opensearch; +import static org.testng.Assert.assertNotNull; + import com.linkedin.metadata.graph.search.SearchGraphServiceTestBase; import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; @@ -9,7 +11,6 @@ import org.opensearch.client.RestHighLevelClient; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.annotation.Import; -import org.testng.AssertJUnit; import org.testng.annotations.Test; @Import({OpenSearchSuite.class, SearchTestContainerConfiguration.class}) @@ -39,6 +40,6 @@ protected ESIndexBuilder getIndexBuilder() { @Test public void initTest() { - AssertJUnit.assertNotNull(_searchClient); + assertNotNull(_searchClient); } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/sibling/SiblingGraphServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/sibling/SiblingGraphServiceTest.java index 15165f59deb160..a61ea1f2562b0c 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/sibling/SiblingGraphServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/sibling/SiblingGraphServiceTest.java @@ -33,6 +33,7 @@ import javax.annotation.Nonnull; import org.mockito.Mockito; import org.testng.annotations.BeforeClass; +import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; public class SiblingGraphServiceTest { @@ -73,9 +74,6 @@ public class SiblingGraphServiceTest { @BeforeClass public void setup() { _mockEntityService = Mockito.mock(EntityService.class); - when(_mockEntityService.exists( - any(OperationContext.class), any(Collection.class), any(Boolean.class))) - .thenAnswer(args -> new HashSet<>(args.getArgument(1))); EntityRegistry entityRegistry = new ConfigEntityRegistry( Snapshot.class.getClassLoader().getResourceAsStream("entity-registry.yml")); @@ -84,34 +82,16 @@ public void setup() { _client = new SiblingGraphService(_mockEntityService, _graphService); } + @BeforeMethod + public void init() { + when(_mockEntityService.exists( + any(OperationContext.class), any(Collection.class), any(Boolean.class))) + .thenAnswer(args -> new HashSet<>(args.getArgument(1))); + } + @Test public void testNoSiblingMetadata() { - EntityLineageResult mockResult = new EntityLineageResult(); - LineageRelationshipArray relationships = new LineageRelationshipArray(); - LineageRelationship relationship1 = new LineageRelationship(); - relationship1.setDegree(0); - relationship1.setType(downstreamOf); - relationship1.setEntity(datasetOneUrn); - - LineageRelationship relationship2 = new LineageRelationship(); - relationship2.setDegree(0); - relationship2.setType(downstreamOf); - relationship2.setEntity(datasetTwoUrn); - - LineageRelationship relationship3 = new LineageRelationship(); - relationship3.setDegree(0); - relationship3.setType(downstreamOf); - relationship3.setEntity(datasetThreeUrn); - - relationships.add(relationship1); - relationships.add(relationship2); - relationships.add(relationship3); - - mockResult.setStart(0); - mockResult.setTotal(200); - mockResult.setCount(3); - mockResult.setFiltered(0); - mockResult.setRelationships(relationships); + EntityLineageResult mockResult = makeBasicMockResult(); when(_graphService.getLineage( any(OperationContext.class), @@ -137,35 +117,9 @@ public void testNoSiblingMetadata() { @Test public void testNoSiblingInResults() { - EntityLineageResult mockResult = new EntityLineageResult(); + EntityLineageResult mockResult = makeBasicMockResult(); EntityLineageResult siblingMockResult = new EntityLineageResult(); - LineageRelationshipArray relationships = new LineageRelationshipArray(); - LineageRelationship relationship1 = new LineageRelationship(); - relationship1.setDegree(0); - relationship1.setType(downstreamOf); - relationship1.setEntity(datasetOneUrn); - - LineageRelationship relationship2 = new LineageRelationship(); - relationship2.setDegree(0); - relationship2.setType(downstreamOf); - relationship2.setEntity(datasetTwoUrn); - - LineageRelationship relationship3 = new LineageRelationship(); - relationship3.setDegree(0); - relationship3.setType(downstreamOf); - relationship3.setEntity(datasetThreeUrn); - - relationships.add(relationship1); - relationships.add(relationship2); - relationships.add(relationship3); - - mockResult.setStart(0); - mockResult.setTotal(200); - mockResult.setCount(3); - mockResult.setFiltered(0); - mockResult.setRelationships(relationships); - when(_graphService.getLineage( any(OperationContext.class), eq(datasetFourUrn), @@ -229,34 +183,9 @@ public void testNoSiblingInResults() { @Test public void testSiblingInResult() throws Exception { - EntityLineageResult mockResult = new EntityLineageResult(); + EntityLineageResult mockResult = makeBasicMockResult(); EntityLineageResult siblingMockResult = new EntityLineageResult(); - LineageRelationshipArray relationships = new LineageRelationshipArray(); - LineageRelationship relationship1 = new LineageRelationship(); - relationship1.setDegree(0); - relationship1.setType(downstreamOf); - relationship1.setEntity(datasetOneUrn); - - LineageRelationship relationship2 = new LineageRelationship(); - relationship2.setDegree(0); - relationship2.setType(downstreamOf); - relationship2.setEntity(datasetTwoUrn); - - LineageRelationship relationship3 = new LineageRelationship(); - relationship3.setDegree(0); - relationship3.setType(downstreamOf); - relationship3.setEntity(datasetThreeUrn); - - relationships.add(relationship1); - relationships.add(relationship2); - relationships.add(relationship3); - - mockResult.setStart(0); - mockResult.setTotal(3); - mockResult.setCount(3); - mockResult.setRelationships(relationships); - siblingMockResult.setStart(0); siblingMockResult.setTotal(0); siblingMockResult.setCount(0); @@ -315,7 +244,9 @@ public void testSiblingInResult() throws Exception { expectedResult.setTotal(3); expectedResult.setCount(2); expectedResult.setFiltered(1); - expectedResult.setRelationships(new LineageRelationshipArray(relationship1, relationship2)); + expectedResult.setRelationships( + new LineageRelationshipArray( + makeBasicRelationship(datasetOneUrn), makeBasicRelationship(datasetTwoUrn))); EntityLineageResult upstreamLineage = service.getLineage(opContext, datasetFourUrn, LineageDirection.UPSTREAM, 0, 100, 1); @@ -335,25 +266,9 @@ public void testCombineSiblingResult() { LineageRelationshipArray siblingRelationships = new LineageRelationshipArray(); LineageRelationshipArray expectedRelationships = new LineageRelationshipArray(); - LineageRelationship relationship1 = new LineageRelationship(); - relationship1.setDegree(0); - relationship1.setType(downstreamOf); - relationship1.setEntity(datasetOneUrn); - - LineageRelationship relationship2 = new LineageRelationship(); - relationship2.setDegree(0); - relationship2.setType(downstreamOf); - relationship2.setEntity(datasetTwoUrn); - - LineageRelationship relationship3 = new LineageRelationship(); - relationship3.setDegree(0); - relationship3.setType(downstreamOf); - relationship3.setEntity(datasetThreeUrn); - - LineageRelationship relationship4 = new LineageRelationship(); - relationship4.setDegree(0); - relationship4.setType(downstreamOf); - relationship4.setEntity(datasetFiveUrn); + LineageRelationship relationship1 = makeBasicRelationship(datasetOneUrn); + LineageRelationship relationship2 = makeBasicRelationship(datasetTwoUrn); + LineageRelationship relationship4 = makeBasicRelationship(datasetFiveUrn); relationships.add(relationship1); @@ -449,25 +364,9 @@ public void testUpstreamOfSiblings() { LineageRelationshipArray siblingRelationships = new LineageRelationshipArray(); LineageRelationshipArray expectedRelationships = new LineageRelationshipArray(); - LineageRelationship relationship1 = new LineageRelationship(); - relationship1.setDegree(0); - relationship1.setType(downstreamOf); - relationship1.setEntity(datasetOneUrn); - - LineageRelationship relationship2 = new LineageRelationship(); - relationship2.setDegree(0); - relationship2.setType(downstreamOf); - relationship2.setEntity(datasetTwoUrn); - - LineageRelationship relationship3 = new LineageRelationship(); - relationship3.setDegree(0); - relationship3.setType(downstreamOf); - relationship3.setEntity(datasetThreeUrn); - - LineageRelationship relationship5 = new LineageRelationship(); - relationship5.setDegree(0); - relationship5.setType(downstreamOf); - relationship5.setEntity(datasetFiveUrn); + LineageRelationship relationship1 = makeBasicRelationship(datasetOneUrn); + LineageRelationship relationship2 = makeBasicRelationship(datasetTwoUrn); + LineageRelationship relationship5 = makeBasicRelationship(datasetFiveUrn); relationships.add(relationship1); @@ -607,11 +506,7 @@ public void testUpstreamOfSiblingSiblings() { LineageRelationshipArray relationships = new LineageRelationshipArray(); LineageRelationshipArray expectedRelationships = new LineageRelationshipArray(); - LineageRelationship relationship = new LineageRelationship(); - relationship.setDegree(0); - relationship.setType(downstreamOf); - relationship.setEntity(datasetFourUrn); - + LineageRelationship relationship = makeBasicRelationship(datasetFourUrn); relationships.add(relationship); expectedRelationships.add(relationship); @@ -722,25 +617,10 @@ public void testRelationshipWithSibling() throws CloneNotSupportedException { LineageRelationshipArray siblingRelationships = new LineageRelationshipArray(); LineageRelationshipArray expectedRelationships = new LineageRelationshipArray(); - LineageRelationship relationship1 = new LineageRelationship(); - relationship1.setDegree(0); - relationship1.setType(downstreamOf); - relationship1.setEntity(datasetOneUrn); - - LineageRelationship relationship2 = new LineageRelationship(); - relationship2.setDegree(0); - relationship2.setType(downstreamOf); - relationship2.setEntity(datasetTwoUrn); - - LineageRelationship relationship3 = new LineageRelationship(); - relationship3.setDegree(0); - relationship3.setType(downstreamOf); - relationship3.setEntity(datasetThreeUrn); - - LineageRelationship relationship5 = new LineageRelationship(); - relationship5.setDegree(0); - relationship5.setType(downstreamOf); - relationship5.setEntity(datasetFiveUrn); + LineageRelationship relationship1 = makeBasicRelationship(datasetOneUrn); + LineageRelationship relationship2 = makeBasicRelationship(datasetTwoUrn); + LineageRelationship relationship3 = makeBasicRelationship(datasetThreeUrn); + LineageRelationship relationship5 = makeBasicRelationship(datasetFiveUrn); relationships.add(relationship1); // relationship between entity and its sibling @@ -1006,7 +886,7 @@ public void testSiblingCombinations() throws URISyntaxException { // Tests for separateSiblings = true: primary sibling EntityLineageResult primaryDownstreamSeparated = service.getLineage( - opContext, primarySiblingUrn, LineageDirection.DOWNSTREAM, 0, 100, 1, true, Set.of()); + opContext, primarySiblingUrn, LineageDirection.DOWNSTREAM, 0, 100, 1, true, false); LineageRelationshipArray expectedRelationships = new LineageRelationshipArray(); expectedRelationships.add(relationship); @@ -1022,7 +902,7 @@ public void testSiblingCombinations() throws URISyntaxException { EntityLineageResult primaryUpstreamSeparated = service.getLineage( - opContext, primarySiblingUrn, LineageDirection.UPSTREAM, 0, 100, 1, true, Set.of()); + opContext, primarySiblingUrn, LineageDirection.UPSTREAM, 0, 100, 1, true, false); EntityLineageResult expectedResultPrimaryUpstreamSeparated = new EntityLineageResult(); expectedResultPrimaryUpstreamSeparated.setCount(2); expectedResultPrimaryUpstreamSeparated.setStart(0); @@ -1035,7 +915,7 @@ public void testSiblingCombinations() throws URISyntaxException { // Test for separateSiblings = true, secondary sibling EntityLineageResult secondarySiblingSeparated = service.getLineage( - opContext, alternateSiblingUrn, LineageDirection.DOWNSTREAM, 0, 100, 1, true, Set.of()); + opContext, alternateSiblingUrn, LineageDirection.DOWNSTREAM, 0, 100, 1, true, false); EntityLineageResult expectedResultSecondarySeparated = new EntityLineageResult(); expectedResultSecondarySeparated.setCount(numDownstreams); @@ -1048,7 +928,7 @@ public void testSiblingCombinations() throws URISyntaxException { EntityLineageResult secondaryUpstreamSeparated = service.getLineage( - opContext, alternateSiblingUrn, LineageDirection.UPSTREAM, 0, 100, 1, true, Set.of()); + opContext, alternateSiblingUrn, LineageDirection.UPSTREAM, 0, 100, 1, true, false); EntityLineageResult expectedResultSecondaryUpstreamSeparated = new EntityLineageResult(); expectedResultSecondaryUpstreamSeparated.setCount(3); expectedResultSecondaryUpstreamSeparated.setStart(0); @@ -1060,15 +940,7 @@ public void testSiblingCombinations() throws URISyntaxException { // Test for separateSiblings = false, primary sibling EntityLineageResult primarySiblingNonSeparated = - service.getLineage( - opContext, - primarySiblingUrn, - LineageDirection.DOWNSTREAM, - 0, - 100, - 1, - false, - new HashSet<>()); + service.getLineage(opContext, primarySiblingUrn, LineageDirection.DOWNSTREAM, 0, 100, 1); EntityLineageResult expectedResultPrimaryNonSeparated = new EntityLineageResult(); expectedResultPrimaryNonSeparated.setCount(numDownstreams); expectedResultPrimaryNonSeparated.setStart(0); @@ -1078,15 +950,7 @@ public void testSiblingCombinations() throws URISyntaxException { assertEquals(primarySiblingNonSeparated, expectedResultPrimaryNonSeparated); EntityLineageResult primarySiblingNonSeparatedUpstream = - service.getLineage( - opContext, - primarySiblingUrn, - LineageDirection.UPSTREAM, - 0, - 100, - 1, - false, - new HashSet<>()); + service.getLineage(opContext, primarySiblingUrn, LineageDirection.UPSTREAM, 0, 100, 1); EntityLineageResult expectedResultPrimaryUpstreamNonSeparated = new EntityLineageResult(); expectedResultPrimaryUpstreamNonSeparated.setCount(2); expectedResultPrimaryUpstreamNonSeparated.setStart(0); @@ -1097,30 +961,84 @@ public void testSiblingCombinations() throws URISyntaxException { // Test for separateSiblings = false, secondary sibling EntityLineageResult secondarySiblingNonSeparated = - service.getLineage( - opContext, - alternateSiblingUrn, - LineageDirection.DOWNSTREAM, - 0, - 100, - 1, - false, - new HashSet<>()); + service.getLineage(opContext, alternateSiblingUrn, LineageDirection.DOWNSTREAM, 0, 100, 1); assertEquals(secondarySiblingNonSeparated, expectedResultPrimaryNonSeparated); EntityLineageResult secondarySiblingNonSeparatedUpstream = - service.getLineage( - opContext, - alternateSiblingUrn, - LineageDirection.UPSTREAM, - 0, - 100, - 1, - false, - new HashSet<>()); + service.getLineage(opContext, alternateSiblingUrn, LineageDirection.UPSTREAM, 0, 100, 1); assertEquals(secondarySiblingNonSeparatedUpstream, expectedResultPrimaryUpstreamNonSeparated); } + @Test + public void testExcludeGhostEntities() { + when(_mockEntityService.exists(any(OperationContext.class), any(Collection.class), eq(false))) + .thenAnswer(args -> Set.of(datasetOneUrn)); + + EntityLineageResult mockGraphResult = makeBasicMockResult(); + + when(_graphService.getLineage( + any(OperationContext.class), + eq(datasetFourUrn), + eq(LineageDirection.UPSTREAM), + eq(0), + eq(100), + eq(1))) + .thenReturn(mockGraphResult); + + when(_mockEntityService.getLatestAspect( + any(OperationContext.class), eq(datasetFourUrn), eq(SIBLINGS_ASPECT_NAME))) + .thenReturn(null); + + SiblingGraphService service = _client; + + EntityLineageResult upstreamLineage = + service.getLineage(opContext, datasetFourUrn, LineageDirection.UPSTREAM, 0, 100, 1); + + EntityLineageResult mockResult = new EntityLineageResult(); + mockResult.setStart(0); + mockResult.setTotal(3); + mockResult.setCount(3); + mockResult.setFiltered(2); + LineageRelationshipArray relationshipsResult = new LineageRelationshipArray(); + relationshipsResult.add(makeBasicRelationship(datasetOneUrn)); + mockResult.setRelationships(relationshipsResult); + + // assert sibling graph service filters out entities that do not exist + assertEquals(upstreamLineage, mockResult); + } + + @Test + public void testIncludeGhostEntities() { + when(_mockEntityService.exists( + any(OperationContext.class), any(Collection.class), any(Boolean.class))) + .thenAnswer(args -> Set.of(datasetOneUrn)); + + EntityLineageResult mockResult = makeBasicMockResult(); + + when(_graphService.getLineage( + any(OperationContext.class), + eq(datasetFourUrn), + eq(LineageDirection.UPSTREAM), + eq(0), + eq(100), + eq(1))) + .thenReturn(mockResult); + + when(_mockEntityService.getLatestAspect( + any(OperationContext.class), eq(datasetFourUrn), eq(SIBLINGS_ASPECT_NAME))) + .thenReturn(null); + + SiblingGraphService service = _client; + + EntityLineageResult upstreamLineage = + service.getLineage( + opContext, datasetFourUrn, LineageDirection.UPSTREAM, 0, 100, 1, false, true); + + // assert sibling graph service is a pass through when there are no siblings and + // includeGhostEntities + assertEquals(upstreamLineage, mockResult); + } + static Urn createFromString(@Nonnull String rawUrn) { try { return Urn.createFromString(rawUrn); @@ -1128,4 +1046,29 @@ static Urn createFromString(@Nonnull String rawUrn) { return null; } } + + static LineageRelationship makeBasicRelationship(Urn urn) { + LineageRelationship relationship = new LineageRelationship(); + relationship.setDegree(0); + relationship.setType(downstreamOf); + relationship.setEntity(urn); + return relationship; + } + + static EntityLineageResult makeBasicMockResult() { + LineageRelationshipArray relationships = new LineageRelationshipArray(); + LineageRelationship relationship1 = makeBasicRelationship(datasetOneUrn); + LineageRelationship relationship2 = makeBasicRelationship(datasetTwoUrn); + LineageRelationship relationship3 = makeBasicRelationship(datasetThreeUrn); + relationships.addAll(List.of(relationship1, relationship2, relationship3)); + + EntityLineageResult mockResult = new EntityLineageResult(); + mockResult.setStart(0); + mockResult.setTotal(3); + mockResult.setCount(3); + mockResult.setFiltered(0); + mockResult.setRelationships(relationships); + + return mockResult; + } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/LineageSearchResultCacheKeyTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/LineageSearchResultCacheKeyTest.java index 1d4a545fc06a2b..d2d27bc2de27c5 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/LineageSearchResultCacheKeyTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/LineageSearchResultCacheKeyTest.java @@ -1,7 +1,7 @@ package com.linkedin.metadata.search; -import static org.testng.AssertJUnit.assertEquals; -import static org.testng.AssertJUnit.assertNotSame; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotSame; import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; import org.testng.annotations.Test; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java index d9268c1b50efeb..39fb6001eeb952 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java @@ -35,7 +35,6 @@ import com.linkedin.metadata.config.cache.SearchCacheConfiguration; import com.linkedin.metadata.config.cache.SearchLineageCacheConfiguration; import com.linkedin.metadata.config.search.SearchConfiguration; -import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.graph.EntityLineageResult; import com.linkedin.metadata.graph.GraphService; import com.linkedin.metadata.graph.LineageDirection; @@ -103,9 +102,6 @@ public abstract class LineageServiceTestBase extends AbstractTestNGSpringContext @Nonnull protected abstract SearchConfiguration getSearchConfiguration(); - @Nonnull - protected abstract CustomSearchConfiguration getCustomSearchConfiguration(); - private SettingsBuilder settingsBuilder; private ElasticSearchService elasticSearchService; private GraphService graphService; @@ -211,10 +207,7 @@ private ElasticSearchService buildEntitySearchService() { QueryFilterRewriteChain.EMPTY); ESBrowseDAO browseDAO = new ESBrowseDAO( - searchClientSpy, - getSearchConfiguration(), - getCustomSearchConfiguration(), - QueryFilterRewriteChain.EMPTY); + searchClientSpy, getSearchConfiguration(), null, QueryFilterRewriteChain.EMPTY); ESWriteDAO writeDAO = new ESWriteDAO(searchClientSpy, getBulkProcessor(), 1); return new ElasticSearchService(indexBuilders, searchDAO, browseDAO, writeDAO); } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTestBase.java index ba83a381916c29..b20326deeb9458 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTestBase.java @@ -14,7 +14,6 @@ import com.linkedin.common.urn.Urn; import com.linkedin.metadata.config.cache.EntityDocCountCacheConfiguration; import com.linkedin.metadata.config.search.SearchConfiguration; -import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.models.registry.SnapshotEntityRegistry; import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; @@ -64,9 +63,6 @@ public abstract class SearchServiceTestBase extends AbstractTestNGSpringContextT @Nonnull protected abstract SearchConfiguration getSearchConfiguration(); - @Nonnull - protected abstract CustomSearchConfiguration getCustomSearchConfiguration(); - protected OperationContext operationContext; private SettingsBuilder settingsBuilder; private ElasticSearchService elasticSearchService; @@ -136,10 +132,7 @@ private ElasticSearchService buildEntitySearchService() { QueryFilterRewriteChain.EMPTY); ESBrowseDAO browseDAO = new ESBrowseDAO( - getSearchClient(), - getSearchConfiguration(), - getCustomSearchConfiguration(), - QueryFilterRewriteChain.EMPTY); + getSearchClient(), getSearchConfiguration(), null, QueryFilterRewriteChain.EMPTY); ESWriteDAO writeDAO = new ESWriteDAO(getSearchClient(), getBulkProcessor(), 1); return new ElasticSearchService(indexBuilders, searchDAO, browseDAO, writeDAO); } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/TestEntityTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/TestEntityTestBase.java index 7b6fcd46333d2d..206b97ce6c1045 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/TestEntityTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/TestEntityTestBase.java @@ -13,7 +13,6 @@ import com.linkedin.metadata.browse.BrowseResult; import com.linkedin.metadata.browse.BrowseResultV2; import com.linkedin.metadata.config.search.SearchConfiguration; -import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.models.registry.SnapshotEntityRegistry; import com.linkedin.metadata.search.elasticsearch.ElasticSearchService; import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; @@ -53,9 +52,6 @@ public abstract class TestEntityTestBase extends AbstractTestNGSpringContextTest @Nonnull protected abstract SearchConfiguration getSearchConfiguration(); - @Nonnull - protected abstract CustomSearchConfiguration getCustomSearchConfiguration(); - private SettingsBuilder settingsBuilder; private ElasticSearchService elasticSearchService; private OperationContext opContext; @@ -102,10 +98,7 @@ private ElasticSearchService buildService() { QueryFilterRewriteChain.EMPTY); ESBrowseDAO browseDAO = new ESBrowseDAO( - getSearchClient(), - getSearchConfiguration(), - getCustomSearchConfiguration(), - QueryFilterRewriteChain.EMPTY); + getSearchClient(), getSearchConfiguration(), null, QueryFilterRewriteChain.EMPTY); ESWriteDAO writeDAO = new ESWriteDAO(getSearchClient(), getBulkProcessor(), 1); ElasticSearchService searchService = new ElasticSearchService(indexBuilders, searchDAO, browseDAO, writeDAO); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/GoldenElasticSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/GoldenElasticSearchTest.java index 29f5964c853f13..ad30c9d0229aa6 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/GoldenElasticSearchTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/GoldenElasticSearchTest.java @@ -1,6 +1,6 @@ package com.linkedin.metadata.search.elasticsearch; -import static org.testng.AssertJUnit.assertNotNull; +import static org.testng.Assert.assertNotNull; import com.linkedin.metadata.search.SearchService; import com.linkedin.metadata.search.fixtures.GoldenTestBase; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/IndexBuilderElasticSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/IndexBuilderElasticSearchTest.java index 911a21767bdeaf..af0b7003bd1d15 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/IndexBuilderElasticSearchTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/IndexBuilderElasticSearchTest.java @@ -1,6 +1,6 @@ package com.linkedin.metadata.search.elasticsearch; -import static org.testng.AssertJUnit.assertNotNull; +import static org.testng.Assert.assertNotNull; import com.linkedin.metadata.search.indexbuilder.IndexBuilderTestBase; import io.datahubproject.test.search.config.SearchTestContainerConfiguration; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/LineageDataFixtureElasticSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/LineageDataFixtureElasticSearchTest.java index 143ae80abc52d3..fe992f61d311d6 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/LineageDataFixtureElasticSearchTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/LineageDataFixtureElasticSearchTest.java @@ -1,5 +1,7 @@ package com.linkedin.metadata.search.elasticsearch; +import static org.testng.Assert.assertNotNull; + import com.linkedin.metadata.search.LineageSearchService; import com.linkedin.metadata.search.SearchService; import com.linkedin.metadata.search.fixtures.LineageDataFixtureTestBase; @@ -10,7 +12,6 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.context.annotation.Import; -import org.testng.AssertJUnit; import org.testng.annotations.Test; @Getter @@ -35,6 +36,6 @@ public class LineageDataFixtureElasticSearchTest extends LineageDataFixtureTestB @Test public void initTest() { - AssertJUnit.assertNotNull(lineageService); + assertNotNull(lineageService); } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/LineageServiceElasticSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/LineageServiceElasticSearchTest.java index 8c4195f9ff5343..7ccf7605432eeb 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/LineageServiceElasticSearchTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/LineageServiceElasticSearchTest.java @@ -1,5 +1,7 @@ package com.linkedin.metadata.search.elasticsearch; +import static org.testng.Assert.assertNotNull; + import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.search.LineageServiceTestBase; @@ -10,8 +12,8 @@ import org.jetbrains.annotations.NotNull; import org.opensearch.client.RestHighLevelClient; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.context.annotation.Import; -import org.testng.AssertJUnit; import org.testng.annotations.Test; @Import({ @@ -25,7 +27,10 @@ public class LineageServiceElasticSearchTest extends LineageServiceTestBase { @Autowired private ESBulkProcessor _bulkProcessor; @Autowired private ESIndexBuilder _esIndexBuilder; @Autowired private SearchConfiguration _searchConfiguration; - @Autowired private CustomSearchConfiguration _customSearchConfiguration; + + @Autowired + @Qualifier("defaultTestCustomSearchConfig") + private CustomSearchConfiguration _customSearchConfiguration; @NotNull @Override @@ -51,14 +56,8 @@ protected SearchConfiguration getSearchConfiguration() { return _searchConfiguration; } - @NotNull - @Override - protected CustomSearchConfiguration getCustomSearchConfiguration() { - return _customSearchConfiguration; - } - @Test public void initTest() { - AssertJUnit.assertNotNull(_searchClient); + assertNotNull(_searchClient); } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SampleDataFixtureElasticSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SampleDataFixtureElasticSearchTest.java index e256f75242a42f..68b68b289dd2c0 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SampleDataFixtureElasticSearchTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SampleDataFixtureElasticSearchTest.java @@ -1,8 +1,9 @@ package com.linkedin.metadata.search.elasticsearch; -import static org.testng.AssertJUnit.assertNotNull; +import static org.testng.Assert.assertNotNull; import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.search.SearchService; import com.linkedin.metadata.search.fixtures.SampleDataFixtureTestBase; import io.datahubproject.metadata.context.OperationContext; @@ -37,6 +38,11 @@ public class SampleDataFixtureElasticSearchTest extends SampleDataFixtureTestBas @Qualifier("sampleDataOperationContext") protected OperationContext operationContext; + @Getter + @Autowired + @Qualifier("fixtureCustomSearchConfig") + protected CustomSearchConfiguration customSearchConfiguration; + @Test public void initTest() { assertNotNull(searchClient); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SearchDAOElasticSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SearchDAOElasticSearchTest.java index a6a8279fe86de2..3fc49a4d624fa5 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SearchDAOElasticSearchTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SearchDAOElasticSearchTest.java @@ -1,8 +1,10 @@ package com.linkedin.metadata.search.elasticsearch; -import static org.testng.AssertJUnit.assertNotNull; +import static org.testng.Assert.assertNotNull; import com.linkedin.metadata.config.search.SearchConfiguration; +import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; +import com.linkedin.metadata.search.elasticsearch.query.ESSearchDAO; import com.linkedin.metadata.search.query.SearchDAOTestBase; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.test.fixtures.search.SampleDataFixtureConfiguration; @@ -28,6 +30,20 @@ public class SearchDAOElasticSearchTest extends SearchDAOTestBase { @Qualifier("sampleDataOperationContext") protected OperationContext operationContext; + @Autowired + @Qualifier("sampleDataEntitySearchService") + protected ElasticSearchService entitySearchService; + + @Getter + @Autowired + @Qualifier("fixtureCustomSearchConfig") + protected CustomSearchConfiguration customSearchConfiguration; + + @Override + protected ESSearchDAO getESSearchDao() { + return entitySearchService.getEsSearchDAO(); + } + @Test public void initTest() { assertNotNull(searchClient); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SearchServiceElasticSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SearchServiceElasticSearchTest.java index 7133971847f982..92dfa18d4feeba 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SearchServiceElasticSearchTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SearchServiceElasticSearchTest.java @@ -1,5 +1,7 @@ package com.linkedin.metadata.search.elasticsearch; +import static org.testng.Assert.assertNotNull; + import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.search.SearchServiceTestBase; @@ -10,8 +12,8 @@ import org.jetbrains.annotations.NotNull; import org.opensearch.client.RestHighLevelClient; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.context.annotation.Import; -import org.testng.AssertJUnit; import org.testng.annotations.Test; @Import({ @@ -25,7 +27,10 @@ public class SearchServiceElasticSearchTest extends SearchServiceTestBase { @Autowired private ESBulkProcessor _bulkProcessor; @Autowired private ESIndexBuilder _esIndexBuilder; @Autowired private SearchConfiguration _searchConfiguration; - @Autowired private CustomSearchConfiguration _customSearchConfiguration; + + @Autowired + @Qualifier("defaultTestCustomSearchConfig") + private CustomSearchConfiguration _customSearchConfiguration; @NotNull @Override @@ -51,14 +56,8 @@ protected SearchConfiguration getSearchConfiguration() { return _searchConfiguration; } - @NotNull - @Override - protected CustomSearchConfiguration getCustomSearchConfiguration() { - return _customSearchConfiguration; - } - @Test public void initTest() { - AssertJUnit.assertNotNull(_searchClient); + assertNotNull(_searchClient); } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SystemMetadataServiceElasticSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SystemMetadataServiceElasticSearchTest.java index a23cd5b051ecbb..b4093459ab3f1b 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SystemMetadataServiceElasticSearchTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SystemMetadataServiceElasticSearchTest.java @@ -1,5 +1,7 @@ package com.linkedin.metadata.search.elasticsearch; +import static org.testng.Assert.assertNotNull; + import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; import com.linkedin.metadata.systemmetadata.SystemMetadataServiceTestBase; @@ -8,7 +10,6 @@ import org.opensearch.client.RestHighLevelClient; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.annotation.Import; -import org.testng.AssertJUnit; import org.testng.annotations.Test; @Import({ElasticSearchSuite.class, SearchTestContainerConfiguration.class}) @@ -38,6 +39,6 @@ protected ESIndexBuilder getIndexBuilder() { @Test public void initTest() { - AssertJUnit.assertNotNull(_searchClient); + assertNotNull(_searchClient); } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/TestEntityElasticSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/TestEntityElasticSearchTest.java index 5ad7b1218a5bf4..ec6f2e2b3f07a5 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/TestEntityElasticSearchTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/TestEntityElasticSearchTest.java @@ -1,5 +1,7 @@ package com.linkedin.metadata.search.elasticsearch; +import static org.testng.Assert.assertNotNull; + import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.search.TestEntityTestBase; @@ -10,8 +12,8 @@ import org.jetbrains.annotations.NotNull; import org.opensearch.client.RestHighLevelClient; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.context.annotation.Import; -import org.testng.AssertJUnit; import org.testng.annotations.Test; @Import({ @@ -25,7 +27,10 @@ public class TestEntityElasticSearchTest extends TestEntityTestBase { @Autowired private ESBulkProcessor bulkProcessor; @Autowired private ESIndexBuilder esIndexBuilder; @Autowired private SearchConfiguration searchConfiguration; - @Autowired private CustomSearchConfiguration customSearchConfiguration; + + @Autowired + @Qualifier("defaultTestCustomSearchConfig") + private CustomSearchConfiguration customSearchConfiguration; @NotNull @Override @@ -51,14 +56,8 @@ protected SearchConfiguration getSearchConfiguration() { return searchConfiguration; } - @NotNull - @Override - protected CustomSearchConfiguration getCustomSearchConfiguration() { - return customSearchConfiguration; - } - @Test public void initTest() { - AssertJUnit.assertNotNull(searchClient); + assertNotNull(searchClient); } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/TimeseriesAspectServiceElasticSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/TimeseriesAspectServiceElasticSearchTest.java index 1f51d463a2963a..8a5f5d673aa7b9 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/TimeseriesAspectServiceElasticSearchTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/TimeseriesAspectServiceElasticSearchTest.java @@ -1,5 +1,7 @@ package com.linkedin.metadata.search.elasticsearch; +import static org.testng.Assert.assertNotNull; + import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; import com.linkedin.metadata.timeseries.search.TimeseriesAspectServiceTestBase; @@ -9,7 +11,6 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.context.annotation.Import; -import org.testng.AssertJUnit; import org.testng.annotations.Test; @Import({ElasticSearchSuite.class, SearchTestContainerConfiguration.class}) @@ -42,6 +43,6 @@ protected ESIndexBuilder getIndexBuilder() { @Test public void initTest() { - AssertJUnit.assertNotNull(_searchClient); + assertNotNull(_searchClient); } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/GoldenTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/GoldenTestBase.java index 1ebcc03eb690bc..052daeece8cd0b 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/GoldenTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/GoldenTestBase.java @@ -4,7 +4,6 @@ import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static io.datahubproject.test.search.SearchTestUtils.searchAcrossEntities; import static org.testng.Assert.*; -import static org.testng.AssertJUnit.assertNotNull; import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureSetupTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureSetupTest.java deleted file mode 100644 index b908933fcc8e37..00000000000000 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureSetupTest.java +++ /dev/null @@ -1,47 +0,0 @@ -package com.linkedin.metadata.search.fixtures; - -import static org.testng.AssertJUnit.assertEquals; - -import com.fasterxml.jackson.dataformat.yaml.YAMLMapper; -import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; -import java.io.IOException; -import java.io.InputStream; -import java.util.List; -import java.util.Map; -import org.springframework.core.io.ClassPathResource; -import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; -import org.testng.annotations.Test; - -public class SampleDataFixtureSetupTest extends AbstractTestNGSpringContextTests { - private static final String DEFAULT_CONFIG = "search_config.yaml"; - private static final String TEST_FIXTURE_CONFIG = "search_config_fixture_test.yml"; - private static final YAMLMapper MAPPER = new YAMLMapper(); - - /** - * Ensure default search configuration matches the test fixture configuration (allowing for some - * differences) - */ - @Test - public void testConfig() throws IOException { - final CustomSearchConfiguration defaultConfig; - final CustomSearchConfiguration fixtureConfig; - - try (InputStream stream = new ClassPathResource(DEFAULT_CONFIG).getInputStream()) { - defaultConfig = MAPPER.readValue(stream, CustomSearchConfiguration.class); - } - try (InputStream stream = new ClassPathResource(TEST_FIXTURE_CONFIG).getInputStream()) { - fixtureConfig = MAPPER.readValue(stream, CustomSearchConfiguration.class); - - // test specifics - ((List>) - fixtureConfig.getQueryConfigurations().get(1).getFunctionScore().get("functions")) - .remove(1); - - ((List>) - fixtureConfig.getQueryConfigurations().get(2).getFunctionScore().get("functions")) - .remove(1); - } - - assertEquals(fixtureConfig, defaultConfig); - } -} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java index 8cb0678180ccbf..bc3c892e07b1bb 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java @@ -12,6 +12,7 @@ import static org.testng.Assert.assertSame; import static org.testng.Assert.assertTrue; +import com.fasterxml.jackson.dataformat.yaml.YAMLMapper; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.linkedin.common.urn.Urn; @@ -22,6 +23,7 @@ import com.linkedin.datahub.graphql.types.corpuser.CorpUserType; import com.linkedin.datahub.graphql.types.dataset.DatasetType; import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.SearchableFieldSpec; import com.linkedin.metadata.models.registry.EntityRegistry; @@ -43,6 +45,7 @@ import com.linkedin.r2.RemoteInvocationException; import io.datahubproject.metadata.context.OperationContext; import java.io.IOException; +import java.io.InputStream; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -63,11 +66,13 @@ import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.search.sort.FieldSortBuilder; import org.opensearch.search.sort.SortBuilder; +import org.springframework.core.io.ClassPathResource; import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; -import org.testng.AssertJUnit; import org.testng.annotations.Test; public abstract class SampleDataFixtureTestBase extends AbstractTestNGSpringContextTests { + public static final String DEFAULT_CONFIG = "search_config.yaml"; + public static final YAMLMapper MAPPER = new YAMLMapper(); @Nonnull protected abstract SearchService getSearchService(); @@ -81,6 +86,9 @@ public abstract class SampleDataFixtureTestBase extends AbstractTestNGSpringCont @Nonnull protected abstract OperationContext getOperationContext(); + @Nonnull + protected abstract CustomSearchConfiguration getCustomSearchConfiguration(); + @Test public void testSearchFieldConfig() throws IOException { /* @@ -971,8 +979,13 @@ public void testSmokeTestQueries() { actualCount, expectedCount, String.format( - "Search term `%s` has %s fulltext results, expected %s results.", - key, actualCount, expectedCount)); + "Search term `%s` has %s fulltext results, expected %s results. Results: %s", + key, + actualCount, + expectedCount, + value.getEntities().stream() + .map(SearchEntity::getEntity) + .collect(Collectors.toList()))); }); Map expectedStructuredMinimums = @@ -998,8 +1011,13 @@ public void testSmokeTestQueries() { actualCount, expectedCount, String.format( - "Search term `%s` has %s structured results, expected %s results.", - key, actualCount, expectedCount)); + "Search term `%s` has %s structured results, expected %s results. Results: %s", + key, + actualCount, + expectedCount, + value.getEntities().stream() + .map(SearchEntity::getEntity) + .collect(Collectors.toList()))); }); } @@ -1318,6 +1336,7 @@ public void testScrollAcrossEntities() throws IOException { String query = "logging_events"; final int batchSize = 1; int totalResults = 0; + List resultUrns = new ArrayList<>(); String scrollId = null; do { ScrollResult result = @@ -1325,10 +1344,11 @@ public void testScrollAcrossEntities() throws IOException { int numResults = result.hasEntities() ? result.getEntities().size() : 0; assertTrue(numResults <= batchSize); totalResults += numResults; + resultUrns.addAll(result.getEntities().stream().map(SearchEntity::getEntity).toList()); scrollId = result.getScrollId(); } while (scrollId != null); // expect 2 total matching results - assertEquals(totalResults, 2); + assertEquals(totalResults, 2, String.format("query `%s` Results: %s", query, resultUrns)); } @Test @@ -1703,7 +1723,15 @@ public void testOr() { assertTrue( result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()), String.format("%s - Expected search results to include matched fields", query)); - assertEquals(result.getEntities().size(), 2); + assertEquals( + result.getEntities().size(), + 2, + String.format( + "Query: `%s` Results: %s", + query, + result.getEntities().stream() + .map(SearchEntity::getEntity) + .collect(Collectors.toList()))); } @Test @@ -1726,7 +1754,15 @@ public void testNegate() { assertTrue( result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()), String.format("%s - Expected search results to include matched fields", query)); - assertEquals(result.getEntities().size(), 2); + assertEquals( + result.getEntities().size(), + 2, + String.format( + "Query: `%s` Results: %s", + query, + result.getEntities().stream() + .map(SearchEntity::getEntity) + .collect(Collectors.toList()))); } @Test @@ -1896,7 +1932,15 @@ public void testPrefixVsExact() { result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()), String.format("%s - Expected search results to include matched fields", query)); - assertEquals(result.getEntities().size(), 2); + assertEquals( + result.getEntities().size(), + 2, + String.format( + "Query: `%s` Results: %s", + query, + result.getEntities().stream() + .map(SearchEntity::getEntity) + .collect(Collectors.toList()))); assertEquals( result.getEntities().get(0).getEntity().toString(), "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.customers,PROD)", @@ -1988,7 +2032,7 @@ public void testSortOrdering() { @Test public void testFilterOnHasValuesField() { - AssertJUnit.assertNotNull(getSearchService()); + assertNotNull(getSearchService()); Filter filter = new Filter() .setOr( @@ -2010,7 +2054,7 @@ public void testFilterOnHasValuesField() { @Test public void testFilterOnNumValuesField() { - AssertJUnit.assertNotNull(getSearchService()); + assertNotNull(getSearchService()); Filter filter = new Filter() .setOr( @@ -2030,6 +2074,34 @@ public void testFilterOnNumValuesField() { assertEquals(searchResult.getEntities().size(), 4); } + /** + * Ensure default search configuration matches the test fixture configuration (allowing for some + * differences) + */ + @Test + public void testConfig() throws IOException { + final CustomSearchConfiguration defaultConfig; + try (InputStream stream = new ClassPathResource(DEFAULT_CONFIG).getInputStream()) { + defaultConfig = MAPPER.readValue(stream, CustomSearchConfiguration.class); + } + + final CustomSearchConfiguration fixtureConfig = + MAPPER.readValue( + MAPPER.writeValueAsBytes(getCustomSearchConfiguration()), + CustomSearchConfiguration.class); + + // test specifics + ((List>) + fixtureConfig.getQueryConfigurations().get(1).getFunctionScore().get("functions")) + .remove(1); + + ((List>) + fixtureConfig.getQueryConfigurations().get(2).getFunctionScore().get("functions")) + .remove(1); + + assertEquals(fixtureConfig, defaultConfig); + } + private Stream getTokens(AnalyzeRequest request) throws IOException { return getSearchClient() diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/GoldenOpenSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/GoldenOpenSearchTest.java index db39531bba08c6..9e105a69de5d14 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/GoldenOpenSearchTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/GoldenOpenSearchTest.java @@ -1,6 +1,6 @@ package com.linkedin.metadata.search.opensearch; -import static org.testng.AssertJUnit.assertNotNull; +import static org.testng.Assert.assertNotNull; import com.linkedin.metadata.search.SearchService; import com.linkedin.metadata.search.fixtures.GoldenTestBase; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/IndexBuilderOpenSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/IndexBuilderOpenSearchTest.java index ef1ed51eb47991..01a9fc84c83a6f 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/IndexBuilderOpenSearchTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/IndexBuilderOpenSearchTest.java @@ -1,6 +1,6 @@ package com.linkedin.metadata.search.opensearch; -import static org.testng.AssertJUnit.assertNotNull; +import static org.testng.Assert.assertNotNull; import com.linkedin.metadata.search.indexbuilder.IndexBuilderTestBase; import io.datahubproject.test.search.config.SearchTestContainerConfiguration; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/LineageDataFixtureOpenSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/LineageDataFixtureOpenSearchTest.java index 98ac4013443524..ec1c485cc0f551 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/LineageDataFixtureOpenSearchTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/LineageDataFixtureOpenSearchTest.java @@ -1,5 +1,7 @@ package com.linkedin.metadata.search.opensearch; +import static org.testng.Assert.assertNotNull; + import com.linkedin.metadata.search.LineageSearchService; import com.linkedin.metadata.search.SearchService; import com.linkedin.metadata.search.fixtures.LineageDataFixtureTestBase; @@ -10,7 +12,6 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.context.annotation.Import; -import org.testng.AssertJUnit; import org.testng.annotations.Test; @Getter @@ -35,6 +36,6 @@ public class LineageDataFixtureOpenSearchTest extends LineageDataFixtureTestBase @Test public void initTest() { - AssertJUnit.assertNotNull(lineageService); + assertNotNull(lineageService); } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/LineageServiceOpenSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/LineageServiceOpenSearchTest.java index 26c2cf28cdecad..d24501d118925e 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/LineageServiceOpenSearchTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/LineageServiceOpenSearchTest.java @@ -1,5 +1,7 @@ package com.linkedin.metadata.search.opensearch; +import static org.testng.Assert.assertNotNull; + import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.search.LineageServiceTestBase; @@ -10,8 +12,8 @@ import org.jetbrains.annotations.NotNull; import org.opensearch.client.RestHighLevelClient; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.context.annotation.Import; -import org.testng.AssertJUnit; import org.testng.annotations.Test; @Import({ @@ -25,7 +27,10 @@ public class LineageServiceOpenSearchTest extends LineageServiceTestBase { @Autowired private ESBulkProcessor _bulkProcessor; @Autowired private ESIndexBuilder _esIndexBuilder; @Autowired private SearchConfiguration _searchConfiguration; - @Autowired private CustomSearchConfiguration _customSearchConfiguration; + + @Autowired + @Qualifier("defaultTestCustomSearchConfig") + private CustomSearchConfiguration _customSearchConfiguration; @NotNull @Override @@ -51,14 +56,8 @@ protected SearchConfiguration getSearchConfiguration() { return _searchConfiguration; } - @NotNull - @Override - protected CustomSearchConfiguration getCustomSearchConfiguration() { - return _customSearchConfiguration; - } - @Test public void initTest() { - AssertJUnit.assertNotNull(_searchClient); + assertNotNull(_searchClient); } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SampleDataFixtureOpenSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SampleDataFixtureOpenSearchTest.java index 5d47e6ffd6fa5c..4bf8465b14ac9d 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SampleDataFixtureOpenSearchTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SampleDataFixtureOpenSearchTest.java @@ -1,8 +1,9 @@ package com.linkedin.metadata.search.opensearch; -import static org.testng.AssertJUnit.assertNotNull; +import static org.testng.Assert.assertNotNull; import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.search.SearchService; import com.linkedin.metadata.search.fixtures.SampleDataFixtureTestBase; import io.datahubproject.metadata.context.OperationContext; @@ -37,6 +38,11 @@ public class SampleDataFixtureOpenSearchTest extends SampleDataFixtureTestBase { @Qualifier("sampleDataOperationContext") protected OperationContext operationContext; + @Getter + @Autowired + @Qualifier("fixtureCustomSearchConfig") + protected CustomSearchConfiguration customSearchConfiguration; + @Test public void initTest() { assertNotNull(searchClient); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SearchDAOOpenSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SearchDAOOpenSearchTest.java index a3a767807d7b9d..1512e146948ac7 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SearchDAOOpenSearchTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SearchDAOOpenSearchTest.java @@ -1,8 +1,11 @@ package com.linkedin.metadata.search.opensearch; -import static org.testng.AssertJUnit.assertNotNull; +import static org.testng.Assert.assertNotNull; import com.linkedin.metadata.config.search.SearchConfiguration; +import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; +import com.linkedin.metadata.search.elasticsearch.ElasticSearchService; +import com.linkedin.metadata.search.elasticsearch.query.ESSearchDAO; import com.linkedin.metadata.search.query.SearchDAOTestBase; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.test.fixtures.search.SampleDataFixtureConfiguration; @@ -28,6 +31,20 @@ public class SearchDAOOpenSearchTest extends SearchDAOTestBase { @Qualifier("sampleDataOperationContext") protected OperationContext operationContext; + @Autowired + @Qualifier("sampleDataEntitySearchService") + protected ElasticSearchService entitySearchService; + + @Getter + @Autowired + @Qualifier("fixtureCustomSearchConfig") + protected CustomSearchConfiguration customSearchConfiguration; + + @Override + protected ESSearchDAO getESSearchDao() { + return entitySearchService.getEsSearchDAO(); + } + @Test public void initTest() { assertNotNull(searchClient); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SearchServiceOpenSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SearchServiceOpenSearchTest.java index 1127ba2089a91b..ab1137c94f2f43 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SearchServiceOpenSearchTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SearchServiceOpenSearchTest.java @@ -1,5 +1,7 @@ package com.linkedin.metadata.search.opensearch; +import static org.testng.Assert.assertNotNull; + import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.search.SearchServiceTestBase; @@ -10,8 +12,8 @@ import org.jetbrains.annotations.NotNull; import org.opensearch.client.RestHighLevelClient; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.context.annotation.Import; -import org.testng.AssertJUnit; import org.testng.annotations.Test; @Import({ @@ -25,7 +27,10 @@ public class SearchServiceOpenSearchTest extends SearchServiceTestBase { @Autowired private ESBulkProcessor _bulkProcessor; @Autowired private ESIndexBuilder _esIndexBuilder; @Autowired private SearchConfiguration _searchConfiguration; - @Autowired private CustomSearchConfiguration _customSearchConfiguration; + + @Autowired + @Qualifier("defaultTestCustomSearchConfig") + private CustomSearchConfiguration _customSearchConfiguration; @NotNull @Override @@ -51,14 +56,8 @@ protected SearchConfiguration getSearchConfiguration() { return _searchConfiguration; } - @NotNull - @Override - protected CustomSearchConfiguration getCustomSearchConfiguration() { - return _customSearchConfiguration; - } - @Test public void initTest() { - AssertJUnit.assertNotNull(_searchClient); + assertNotNull(_searchClient); } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SystemMetadataServiceOpenSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SystemMetadataServiceOpenSearchTest.java index 7ba90319cf1d3e..46bf30fcedafb8 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SystemMetadataServiceOpenSearchTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SystemMetadataServiceOpenSearchTest.java @@ -1,5 +1,7 @@ package com.linkedin.metadata.search.opensearch; +import static org.testng.Assert.assertNotNull; + import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; import com.linkedin.metadata.systemmetadata.SystemMetadataServiceTestBase; @@ -8,7 +10,6 @@ import org.opensearch.client.RestHighLevelClient; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.annotation.Import; -import org.testng.AssertJUnit; import org.testng.annotations.Test; @Import({OpenSearchSuite.class, SearchTestContainerConfiguration.class}) @@ -38,6 +39,6 @@ protected ESIndexBuilder getIndexBuilder() { @Test public void initTest() { - AssertJUnit.assertNotNull(_searchClient); + assertNotNull(_searchClient); } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/TestEntityOpenSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/TestEntityOpenSearchTest.java index 80db8864014c32..96adf052cde456 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/TestEntityOpenSearchTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/TestEntityOpenSearchTest.java @@ -1,5 +1,7 @@ package com.linkedin.metadata.search.opensearch; +import static org.testng.Assert.assertNotNull; + import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.search.TestEntityTestBase; @@ -10,8 +12,8 @@ import org.jetbrains.annotations.NotNull; import org.opensearch.client.RestHighLevelClient; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.context.annotation.Import; -import org.testng.AssertJUnit; import org.testng.annotations.Test; @Import({ @@ -25,7 +27,10 @@ public class TestEntityOpenSearchTest extends TestEntityTestBase { @Autowired private ESBulkProcessor _bulkProcessor; @Autowired private ESIndexBuilder _esIndexBuilder; @Autowired private SearchConfiguration _searchConfiguration; - @Autowired private CustomSearchConfiguration _customSearchConfiguration; + + @Autowired + @Qualifier("defaultTestCustomSearchConfig") + private CustomSearchConfiguration _customSearchConfiguration; @NotNull @Override @@ -51,14 +56,8 @@ protected SearchConfiguration getSearchConfiguration() { return _searchConfiguration; } - @NotNull - @Override - protected CustomSearchConfiguration getCustomSearchConfiguration() { - return _customSearchConfiguration; - } - @Test public void initTest() { - AssertJUnit.assertNotNull(_searchClient); + assertNotNull(_searchClient); } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/TimeseriesAspectServiceOpenSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/TimeseriesAspectServiceOpenSearchTest.java index 16ac03415ee5c2..b60ba08d9785bf 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/TimeseriesAspectServiceOpenSearchTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/TimeseriesAspectServiceOpenSearchTest.java @@ -1,5 +1,7 @@ package com.linkedin.metadata.search.opensearch; +import static org.testng.Assert.assertNotNull; + import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; import com.linkedin.metadata.timeseries.search.TimeseriesAspectServiceTestBase; @@ -9,7 +11,6 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.context.annotation.Import; -import org.testng.AssertJUnit; import org.testng.annotations.Test; @Import({OpenSearchSuite.class, SearchTestContainerConfiguration.class}) @@ -42,6 +43,6 @@ protected ESIndexBuilder getIndexBuilder() { @Test public void initTest() { - AssertJUnit.assertNotNull(_searchClient); + assertNotNull(_searchClient); } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/BrowseDAOTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/BrowseDAOTest.java index 9c3d515f9322fb..e71865921678bb 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/BrowseDAOTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/BrowseDAOTest.java @@ -27,6 +27,7 @@ import org.opensearch.search.SearchHit; import org.opensearch.search.SearchHits; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.context.annotation.Import; import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; import org.testng.annotations.BeforeMethod; @@ -39,7 +40,10 @@ public class BrowseDAOTest extends AbstractTestNGSpringContextTests { private OperationContext opContext; @Autowired private SearchConfiguration searchConfiguration; - @Autowired private CustomSearchConfiguration customSearchConfiguration; + + @Autowired + @Qualifier("defaultTestCustomSearchConfig") + private CustomSearchConfiguration customSearchConfiguration; @BeforeMethod public void setup() throws RemoteInvocationException, URISyntaxException { diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/SearchDAOTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/SearchDAOTestBase.java index eafe5c7b5c3103..6779b8f3d825c4 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/SearchDAOTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/SearchDAOTestBase.java @@ -1,6 +1,8 @@ package com.linkedin.metadata.search.query; import static com.linkedin.metadata.Constants.*; +import static com.linkedin.metadata.search.fixtures.SampleDataFixtureTestBase.DEFAULT_CONFIG; +import static com.linkedin.metadata.search.fixtures.SampleDataFixtureTestBase.MAPPER; import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static com.linkedin.metadata.utils.SearchUtil.AGGREGATION_SEPARATOR_CHAR; import static com.linkedin.metadata.utils.SearchUtil.ES_INDEX_FIELD; @@ -12,6 +14,7 @@ import com.linkedin.data.template.LongMap; import com.linkedin.metadata.config.search.SearchConfiguration; +import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; @@ -27,15 +30,17 @@ import com.linkedin.metadata.search.elasticsearch.ElasticSearchService; import com.linkedin.metadata.search.elasticsearch.query.ESSearchDAO; import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; -import com.linkedin.metadata.search.opensearch.SearchDAOOpenSearchTest; import com.linkedin.metadata.utils.SearchUtil; import io.datahubproject.metadata.context.OperationContext; +import java.io.IOException; +import java.io.InputStream; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Map; import org.opensearch.action.explain.ExplainResponse; import org.opensearch.client.RestHighLevelClient; +import org.springframework.core.io.ClassPathResource; import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; import org.testng.annotations.Test; @@ -47,6 +52,10 @@ public abstract class SearchDAOTestBase extends AbstractTestNGSpringContextTests protected abstract OperationContext getOperationContext(); + protected abstract ESSearchDAO getESSearchDao(); + + protected abstract CustomSearchConfiguration getCustomSearchConfiguration(); + @Test public void testTransformFilterForEntitiesNoChange() { Criterion c = @@ -413,30 +422,21 @@ public void testTransformIndexIntoEntityNameNested() { @Test public void testExplain() { - ESSearchDAO searchDAO = - new ESSearchDAO( - getSearchClient(), - false, - this instanceof SearchDAOOpenSearchTest - ? ELASTICSEARCH_IMPLEMENTATION_OPENSEARCH - : ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, - getSearchConfiguration(), - null, - QueryFilterRewriteChain.EMPTY); ExplainResponse explainResponse = - searchDAO.explain( - getOperationContext() - .withSearchFlags(flags -> ElasticSearchService.DEFAULT_SERVICE_SEARCH_FLAGS), - "*", - "urn:li:dataset:(urn:li:dataPlatform:bigquery,bigquery-public-data.covid19_geotab_mobility_impact." - + "ca_border_wait_times,PROD)", - DATASET_ENTITY_NAME, - null, - null, - null, - null, - 10, - null); + getESSearchDao() + .explain( + getOperationContext() + .withSearchFlags(flags -> ElasticSearchService.DEFAULT_SERVICE_SEARCH_FLAGS), + "*", + "urn:li:dataset:(urn:li:dataPlatform:bigquery,bigquery-public-data.covid19_geotab_mobility_impact." + + "ca_border_wait_times,PROD)", + DATASET_ENTITY_NAME, + null, + null, + null, + null, + 10, + null); assertNotNull(explainResponse); assertEquals(explainResponse.getIndex(), "smpldat_datasetindex_v2"); @@ -444,6 +444,34 @@ public void testExplain() { explainResponse.getId(), "urn:li:dataset:(urn:li:dataPlatform:bigquery,bigquery-public-data.covid19_geotab_mobility_impact.ca_border_wait_times,PROD)"); assertTrue(explainResponse.isExists()); - assertEquals(explainResponse.getExplanation().getValue(), 18.0f); + assertEquals(explainResponse.getExplanation().getValue(), 1.25f); + } + + /** + * Ensure default search configuration matches the test fixture configuration (allowing for some + * differences) + */ + @Test + public void testConfig() throws IOException { + final CustomSearchConfiguration defaultConfig; + try (InputStream stream = new ClassPathResource(DEFAULT_CONFIG).getInputStream()) { + defaultConfig = MAPPER.readValue(stream, CustomSearchConfiguration.class); + } + + final CustomSearchConfiguration fixtureConfig = + MAPPER.readValue( + MAPPER.writeValueAsBytes(getCustomSearchConfiguration()), + CustomSearchConfiguration.class); + + // test specifics + ((List>) + fixtureConfig.getQueryConfigurations().get(1).getFunctionScore().get("functions")) + .remove(1); + + ((List>) + fixtureConfig.getQueryConfigurations().get(2).getFunctionScore().get("functions")) + .remove(1); + + assertEquals(fixtureConfig, defaultConfig); } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/ContainerExpansionRewriterTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/ContainerExpansionRewriterTest.java index f91e3a28f1bd69..5246e4dbe5bf92 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/ContainerExpansionRewriterTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/ContainerExpansionRewriterTest.java @@ -311,7 +311,7 @@ public void testNestedBoolQueryRewrite() { new RelatedEntities( "IsPartOf", childUrn, parentUrn, RelationshipDirection.OUTGOING, null)))); - BoolQueryBuilder testQuery = QueryBuilders.boolQuery(); + BoolQueryBuilder testQuery = QueryBuilders.boolQuery().minimumShouldMatch(1); testQuery.filter( QueryBuilders.boolQuery() .filter( @@ -319,8 +319,11 @@ public void testNestedBoolQueryRewrite() { testQuery.filter(QueryBuilders.existsQuery("someField")); testQuery.should( QueryBuilders.boolQuery() + .minimumShouldMatch(1) .should( - QueryBuilders.boolQuery().should(QueryBuilders.termsQuery(FIELD_NAME, childUrn)))); + QueryBuilders.boolQuery() + .minimumShouldMatch(1) + .should(QueryBuilders.termsQuery(FIELD_NAME, childUrn)))); testQuery.should(QueryBuilders.existsQuery("someField")); testQuery.must( QueryBuilders.boolQuery() @@ -332,7 +335,7 @@ public void testNestedBoolQueryRewrite() { QueryBuilders.boolQuery().mustNot(QueryBuilders.termsQuery(FIELD_NAME, childUrn)))); testQuery.mustNot(QueryBuilders.existsQuery("someField")); - BoolQueryBuilder expectedRewrite = QueryBuilders.boolQuery(); + BoolQueryBuilder expectedRewrite = QueryBuilders.boolQuery().minimumShouldMatch(1); expectedRewrite.filter( QueryBuilders.boolQuery() .filter( @@ -341,8 +344,10 @@ public void testNestedBoolQueryRewrite() { expectedRewrite.filter(QueryBuilders.existsQuery("someField")); expectedRewrite.should( QueryBuilders.boolQuery() + .minimumShouldMatch(1) .should( QueryBuilders.boolQuery() + .minimumShouldMatch(1) .should(QueryBuilders.termsQuery(FIELD_NAME, childUrn, parentUrn)))); expectedRewrite.should(QueryBuilders.existsQuery("someField")); expectedRewrite.must( diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/DomainExpansionRewriterTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/DomainExpansionRewriterTest.java index 76e650f4054566..edc6449438581f 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/DomainExpansionRewriterTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/DomainExpansionRewriterTest.java @@ -312,7 +312,7 @@ public void testNestedBoolQueryRewrite() { new RelatedEntities( "IsPartOf", childUrn, parentUrn, RelationshipDirection.INCOMING, null)))); - BoolQueryBuilder testQuery = QueryBuilders.boolQuery(); + BoolQueryBuilder testQuery = QueryBuilders.boolQuery().minimumShouldMatch(1); testQuery.filter( QueryBuilders.boolQuery() .filter( @@ -320,9 +320,15 @@ public void testNestedBoolQueryRewrite() { testQuery.filter(QueryBuilders.boolQuery().filter(QueryBuilders.existsQuery("someField"))); testQuery.should( QueryBuilders.boolQuery() + .minimumShouldMatch(1) .should( - QueryBuilders.boolQuery().should(QueryBuilders.termsQuery(FIELD_NAME, parentUrn)))); - testQuery.should(QueryBuilders.boolQuery().should(QueryBuilders.existsQuery("someField"))); + QueryBuilders.boolQuery() + .minimumShouldMatch(1) + .should(QueryBuilders.termsQuery(FIELD_NAME, parentUrn)))); + testQuery.should( + QueryBuilders.boolQuery() + .minimumShouldMatch(1) + .should(QueryBuilders.existsQuery("someField"))); testQuery.must( QueryBuilders.boolQuery() .must(QueryBuilders.boolQuery().must(QueryBuilders.termsQuery(FIELD_NAME, parentUrn)))); @@ -334,7 +340,7 @@ public void testNestedBoolQueryRewrite() { .mustNot(QueryBuilders.termsQuery(FIELD_NAME, parentUrn)))); testQuery.mustNot(QueryBuilders.boolQuery().mustNot(QueryBuilders.existsQuery("someField"))); - BoolQueryBuilder expectedRewrite = QueryBuilders.boolQuery(); + BoolQueryBuilder expectedRewrite = QueryBuilders.boolQuery().minimumShouldMatch(1); expectedRewrite.filter( QueryBuilders.boolQuery() .filter( @@ -344,11 +350,15 @@ public void testNestedBoolQueryRewrite() { QueryBuilders.boolQuery().filter(QueryBuilders.existsQuery("someField"))); expectedRewrite.should( QueryBuilders.boolQuery() + .minimumShouldMatch(1) .should( QueryBuilders.boolQuery() + .minimumShouldMatch(1) .should(QueryBuilders.termsQuery(FIELD_NAME, childUrn, parentUrn)))); expectedRewrite.should( - QueryBuilders.boolQuery().should(QueryBuilders.existsQuery("someField"))); + QueryBuilders.boolQuery() + .minimumShouldMatch(1) + .should(QueryBuilders.existsQuery("someField"))); expectedRewrite.must( QueryBuilders.boolQuery() .must( diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java index 0ea2340ae82173..1381e9560b7e53 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java @@ -41,7 +41,7 @@ public class AggregationQueryBuilderTest { private static AspectRetriever aspectRetrieverV1; @BeforeClass - public static void setup() throws RemoteInvocationException, URISyntaxException { + public void setup() throws RemoteInvocationException, URISyntaxException { Urn helloUrn = Urn.createFromString("urn:li:structuredProperty:hello"); Urn abFghTenUrn = Urn.createFromString("urn:li:structuredProperty:ab.fgh.ten"); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchQueryBuilderTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchQueryBuilderTest.java index 8d83317449a1ea..374a69ee9a5536 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchQueryBuilderTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchQueryBuilderTest.java @@ -61,6 +61,10 @@ public class SearchQueryBuilderTest extends AbstractTestNGSpringContextTests { @Qualifier("queryOperationContext") private OperationContext operationContext; + @Autowired + @Qualifier("defaultTestCustomSearchConfig") + private CustomSearchConfiguration customSearchConfiguration; + public static SearchConfiguration testQueryConfig; static { diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java index a90c0291f53b8f..a3ef62760d7972 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java @@ -1,5 +1,6 @@ package com.linkedin.metadata.search.query.request; +import static com.linkedin.datahub.graphql.resolvers.search.SearchUtils.SEARCHABLE_ENTITY_TYPES; import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static com.linkedin.metadata.utils.CriterionUtils.buildExistsCriterion; import static com.linkedin.metadata.utils.CriterionUtils.buildIsNullCriterion; @@ -8,7 +9,10 @@ import static org.testng.Assert.*; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import com.linkedin.data.template.StringArray; +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.datahub.graphql.types.entitytype.EntityTypeMapper; import com.linkedin.metadata.TestEntitySpecBuilder; import com.linkedin.metadata.config.search.ExactMatchConfiguration; import com.linkedin.metadata.config.search.PartialConfiguration; @@ -35,6 +39,7 @@ import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; +import java.util.stream.Stream; import org.opensearch.action.search.SearchRequest; import org.opensearch.index.query.BoolQueryBuilder; import org.opensearch.index.query.ExistsQueryBuilder; @@ -628,6 +633,116 @@ public void testBrowsePathQueryFilter() { assertEquals(((ExistsQueryBuilder) mustHaveV1.must().get(0)).fieldName(), "browsePaths"); } + @Test + public void testQueryByDefault() { + final Set COMMON = + Set.of( + "container", + "fieldDescriptions", + "description", + "platform", + "fieldPaths", + "editedFieldGlossaryTerms", + "editedFieldDescriptions", + "fieldTags", + "id", + "editedDescription", + "qualifiedName", + "domains", + "platformInstance", + "tags", + "urn", + "customProperties", + "fieldGlossaryTerms", + "editedName", + "name", + "fieldLabels", + "glossaryTerms", + "editedFieldTags", + "displayName", + "title"); + + Map> expectedQueryByDefault = + ImmutableMap.>builder() + .put( + EntityType.DASHBOARD, + Stream.concat(COMMON.stream(), Stream.of("tool")).collect(Collectors.toSet())) + .put( + EntityType.CHART, + Stream.concat(COMMON.stream(), Stream.of("tool")).collect(Collectors.toSet())) + .put( + EntityType.MLMODEL, + Stream.concat(COMMON.stream(), Stream.of("type")).collect(Collectors.toSet())) + .put( + EntityType.MLFEATURE_TABLE, + Stream.concat(COMMON.stream(), Stream.of("features", "primaryKeys")) + .collect(Collectors.toSet())) + .put( + EntityType.MLFEATURE, + Stream.concat(COMMON.stream(), Stream.of("featureNamespace")) + .collect(Collectors.toSet())) + .put( + EntityType.MLPRIMARY_KEY, + Stream.concat(COMMON.stream(), Stream.of("featureNamespace")) + .collect(Collectors.toSet())) + .put( + EntityType.DATA_FLOW, + Stream.concat(COMMON.stream(), Stream.of("cluster", "orchestrator", "flowId")) + .collect(Collectors.toSet())) + .put( + EntityType.DATA_JOB, + Stream.concat(COMMON.stream(), Stream.of("jobId")).collect(Collectors.toSet())) + .put( + EntityType.GLOSSARY_TERM, + Stream.concat( + COMMON.stream(), + Stream.of("values", "parentNode", "relatedTerms", "definition")) + .collect(Collectors.toSet())) + .put( + EntityType.GLOSSARY_NODE, + Stream.concat(COMMON.stream(), Stream.of("definition", "parentNode")) + .collect(Collectors.toSet())) + .put( + EntityType.CORP_USER, + Stream.concat( + COMMON.stream(), Stream.of("skills", "teams", "ldap", "fullName", "email")) + .collect(Collectors.toSet())) + .put( + EntityType.DOMAIN, + Stream.concat(COMMON.stream(), Stream.of("parentDomain")) + .collect(Collectors.toSet())) + .put( + EntityType.SCHEMA_FIELD, + Stream.concat(COMMON.stream(), Stream.of("schemaFieldAliases", "parent")) + .collect(Collectors.toSet())) + .build(); + + for (EntityType entityType : SEARCHABLE_ENTITY_TYPES) { + Set expectedEntityQueryByDefault = + expectedQueryByDefault.getOrDefault(entityType, COMMON); + assertFalse(expectedEntityQueryByDefault.isEmpty()); + + EntitySpec entitySpec = + operationContext.getEntityRegistry().getEntitySpec(EntityTypeMapper.getName(entityType)); + SearchRequestHandler handler = + SearchRequestHandler.getBuilder( + operationContext.getEntityRegistry(), + entitySpec, + testQueryConfig, + null, + QueryFilterRewriteChain.EMPTY); + + Set unexpected = new HashSet<>(handler.getDefaultQueryFieldNames()); + unexpected.removeAll(expectedEntityQueryByDefault); + + assertTrue( + unexpected.isEmpty(), + String.format( + "Consider whether these field(s) for entity %s should be included for general search. Fields: %s If yes, please update the test expectations. If no, please annotate the PDL model with \"queryByDefault\": false", + entityType, unexpected)); + } + } + private BoolQueryBuilder getQuery(final Criterion filterCriterion) { final Filter filter = new Filter() diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java index ef000b01a64e55..2c5bcd1294fa15 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java @@ -15,7 +15,10 @@ import com.fasterxml.jackson.databind.node.JsonNodeType; import com.fasterxml.jackson.databind.node.ObjectNode; import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.DataMapBuilder; +import com.linkedin.dataset.DatasetProperties; +import com.linkedin.dataset.EditableDatasetProperties; import com.linkedin.entity.Aspect; import com.linkedin.metadata.TestEntitySpecBuilder; import com.linkedin.metadata.TestEntityUtil; @@ -39,6 +42,17 @@ public class SearchDocumentTransformerTest { private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final EntityRegistry ENTITY_REGISTRY = + TestOperationContexts.defaultEntityRegistry(); + private static final EntityRegistry TEST_ENTITY_REGISTRY; + + static { + TEST_ENTITY_REGISTRY = + new ConfigEntityRegistry( + TestSearchFieldConfig.class + .getClassLoader() + .getResourceAsStream("test-entity-registry.yaml")); + } static { int maxSize = @@ -214,7 +228,6 @@ public void testSetSearchableRefValue() throws URISyntaxException, RemoteInvocat SearchDocumentTransformer searchDocumentTransformer = new SearchDocumentTransformer(1000, 1000, 1000); - EntityRegistry entityRegistry = getTestEntityRegistry(); List urnList = List.of(Urn.createFromString("urn:li:refEntity:1")); DataMapBuilder dataMapBuilder = new DataMapBuilder(); @@ -225,10 +238,10 @@ public void testSetSearchableRefValue() throws URISyntaxException, RemoteInvocat ObjectNode searchDocument = JsonNodeFactory.instance.objectNode(); SearchableRefFieldSpec searchableRefFieldSpec = - entityRegistry.getEntitySpec("testRefEntity").getSearchableRefFieldSpecs().get(0); + TEST_ENTITY_REGISTRY.getEntitySpec("testRefEntity").getSearchableRefFieldSpecs().get(0); // Mock Behaviour - Mockito.when(aspectRetriever.getEntityRegistry()).thenReturn(entityRegistry); + Mockito.when(aspectRetriever.getEntityRegistry()).thenReturn(TEST_ENTITY_REGISTRY); Mockito.when(aspectRetriever.getLatestAspectObject(any(), anyString())).thenReturn(aspect); OperationContext opContext = TestOperationContexts.systemContextNoSearchAuthorization( @@ -258,14 +271,13 @@ public void testSetSearchableRefValue_WithNonURNField() throws URISyntaxExceptio SearchDocumentTransformer searchDocumentTransformer = new SearchDocumentTransformer(1000, 1000, 1000); - EntityRegistry entityRegistry = getTestEntityRegistry(); OperationContext opContext = - TestOperationContexts.systemContextNoSearchAuthorization(entityRegistry); + TestOperationContexts.systemContextNoSearchAuthorization(TEST_ENTITY_REGISTRY); List urnList = List.of(Urn.createFromString("urn:li:refEntity:1")); ObjectNode searchDocument = JsonNodeFactory.instance.objectNode(); SearchableRefFieldSpec searchableRefFieldSpecText = - entityRegistry.getEntitySpec("testRefEntity").getSearchableRefFieldSpecs().get(1); + TEST_ENTITY_REGISTRY.getEntitySpec("testRefEntity").getSearchableRefFieldSpecs().get(1); searchDocumentTransformer.setSearchableRefValue( opContext, searchableRefFieldSpecText, urnList, searchDocument, false); assertTrue(searchDocument.isEmpty()); @@ -278,10 +290,9 @@ public void testSetSearchableRefValue_RuntimeException() SearchDocumentTransformer searchDocumentTransformer = new SearchDocumentTransformer(1000, 1000, 1000); - EntityRegistry entityRegistry = getTestEntityRegistry(); List urnList = List.of(Urn.createFromString("urn:li:refEntity:1")); - Mockito.when(aspectRetriever.getEntityRegistry()).thenReturn(entityRegistry); + Mockito.when(aspectRetriever.getEntityRegistry()).thenReturn(TEST_ENTITY_REGISTRY); Mockito.when( aspectRetriever.getLatestAspectObject( eq(Urn.createFromString("urn:li:refEntity:1")), anyString())) @@ -296,7 +307,7 @@ public void testSetSearchableRefValue_RuntimeException() ObjectNode searchDocument = JsonNodeFactory.instance.objectNode(); SearchableRefFieldSpec searchableRefFieldSpec = - entityRegistry.getEntitySpec("testRefEntity").getSearchableRefFieldSpecs().get(0); + TEST_ENTITY_REGISTRY.getEntitySpec("testRefEntity").getSearchableRefFieldSpecs().get(0); searchDocumentTransformer.setSearchableRefValue( opContext, searchableRefFieldSpec, urnList, searchDocument, false); assertTrue(searchDocument.isEmpty()); @@ -309,7 +320,6 @@ public void testSetSearchableRefValue_RuntimeException_URNExist() SearchDocumentTransformer searchDocumentTransformer = new SearchDocumentTransformer(1000, 1000, 1000); - EntityRegistry entityRegistry = getTestEntityRegistry(); List urnList = List.of(Urn.createFromString("urn:li:refEntity:1")); DataMapBuilder dataMapBuilder = new DataMapBuilder(); dataMapBuilder.addKVPair("fieldPath", "refEntityUrn"); @@ -317,7 +327,7 @@ public void testSetSearchableRefValue_RuntimeException_URNExist() dataMapBuilder.addKVPair("description", "refEntityUrn1 description details"); Aspect aspect = new Aspect(dataMapBuilder.convertToDataMap()); - Mockito.when(aspectRetriever.getEntityRegistry()).thenReturn(entityRegistry); + Mockito.when(aspectRetriever.getEntityRegistry()).thenReturn(TEST_ENTITY_REGISTRY); Mockito.when( aspectRetriever.getLatestAspectObject( eq(Urn.createFromString("urn:li:refEntity:1")), anyString())) @@ -333,7 +343,7 @@ public void testSetSearchableRefValue_RuntimeException_URNExist() ObjectNode searchDocument = JsonNodeFactory.instance.objectNode(); SearchableRefFieldSpec searchableRefFieldSpec = - entityRegistry.getEntitySpec("testRefEntity").getSearchableRefFieldSpecs().get(0); + TEST_ENTITY_REGISTRY.getEntitySpec("testRefEntity").getSearchableRefFieldSpecs().get(0); searchDocumentTransformer.setSearchableRefValue( opContext, searchableRefFieldSpec, urnList, searchDocument, false); assertTrue(searchDocument.has("refEntityUrns")); @@ -349,13 +359,12 @@ void testSetSearchableRefValue_WithInvalidURN() SearchDocumentTransformer searchDocumentTransformer = new SearchDocumentTransformer(1000, 1000, 1000); - EntityRegistry entityRegistry = getTestEntityRegistry(); List urnList = List.of(Urn.createFromString("urn:li:refEntity:1")); - Mockito.when(aspectRetriever.getEntityRegistry()).thenReturn(entityRegistry); + Mockito.when(aspectRetriever.getEntityRegistry()).thenReturn(TEST_ENTITY_REGISTRY); Mockito.when(aspectRetriever.getLatestAspectObject(any(), anyString())).thenReturn(null); SearchableRefFieldSpec searchableRefFieldSpec = - entityRegistry.getEntitySpec("testRefEntity").getSearchableRefFieldSpecs().get(0); + TEST_ENTITY_REGISTRY.getEntitySpec("testRefEntity").getSearchableRefFieldSpecs().get(0); OperationContext opContext = TestOperationContexts.systemContextNoSearchAuthorization( RetrieverContext.builder() @@ -371,10 +380,42 @@ void testSetSearchableRefValue_WithInvalidURN() assertTrue(searchDocument.get("refEntityUrns").getNodeType().equals(JsonNodeType.NULL)); } - private EntityRegistry getTestEntityRegistry() { - return new ConfigEntityRegistry( - TestSearchFieldConfig.class - .getClassLoader() - .getResourceAsStream("test-entity-registry.yaml")); + @Test + public void testEmptyDescription() throws RemoteInvocationException, URISyntaxException { + String entityUrn = "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)"; + SearchDocumentTransformer test = new SearchDocumentTransformer(1000, 1000, 1000); + + // editedDescription - empty string + Optional transformed = + test.transformAspect( + mock(OperationContext.class), + UrnUtils.getUrn(entityUrn), + new EditableDatasetProperties().setDescription(""), + ENTITY_REGISTRY + .getEntitySpec(DATASET_ENTITY_NAME) + .getAspectSpec(EDITABLE_DATASET_PROPERTIES_ASPECT_NAME), + false); + + assertTrue(transformed.isPresent()); + assertEquals(transformed.get().get("urn").asText(), entityUrn); + assertTrue(transformed.get().has("editedDescription")); + assertTrue(transformed.get().get("editedDescription").isNull()); + + // description - empty string + transformed = + test.transformAspect( + mock(OperationContext.class), + UrnUtils.getUrn(entityUrn), + new DatasetProperties().setDescription(""), + ENTITY_REGISTRY + .getEntitySpec(DATASET_ENTITY_NAME) + .getAspectSpec(DATASET_PROPERTIES_ASPECT_NAME), + false); + + assertTrue(transformed.isPresent()); + assertEquals(transformed.get().get("urn").asText(), entityUrn); + assertTrue(transformed.get().has("description")); + assertTrue(transformed.get().get("description").isNull()); + assertFalse(transformed.get().get("hasDescription").asBoolean()); } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java index c5f9986284627d..8d06594e415e08 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java @@ -38,7 +38,7 @@ public class ESUtilsTest { private static AspectRetriever aspectRetrieverV1; @BeforeClass - public static void setup() throws RemoteInvocationException, URISyntaxException { + public void setup() throws RemoteInvocationException, URISyntaxException { Urn abFghTenUrn = Urn.createFromString("urn:li:structuredProperty:ab.fgh.ten"); // legacy @@ -101,6 +101,7 @@ public void testGetQueryBuilderFromCriterionEqualsValues() { + " \"_name\" : \"myTestField\"\n" + " }\n" + "}"; + Assert.assertEquals(result.toString(), expected); final Criterion multiValueCriterion = @@ -150,6 +151,85 @@ public void testGetQueryBuilderFromCriterionEqualsValues() { Assert.assertEquals(result.toString(), expected); } + @Test + public void testGetQueryBuilderFromCriterionIEqualValues() { // Test case insensitive searches + + final Criterion singleValueCriterion = + buildCriterion("myTestField", Condition.IEQUAL, "value1"); + + QueryBuilder result = + ESUtils.getQueryBuilderFromCriterion( + singleValueCriterion, + false, + new HashMap<>(), + mock(OperationContext.class), + QueryFilterRewriteChain.EMPTY); + + String expected = + "{\n" + + " \"bool\" : {\n" + + " \"should\" : [\n" + + " {\n" + + " \"term\" : {\n" + + " \"myTestField.keyword\" : {\n" + + " \"value\" : \"value1\",\n" + + " \"case_insensitive\" : true,\n" + + " \"boost\" : 1.0\n" + + " }\n" + + " }\n" + + " }\n" + + " ],\n" + + " \"adjust_pure_negative\" : true,\n" + + " \"boost\" : 1.0,\n" + + " \"_name\" : \"myTestField\"\n" + + " }\n" + + "}"; + + Assert.assertEquals(result.toString(), expected); + + final Criterion multiValueCriterion = + buildCriterion("myTestField", Condition.IEQUAL, "value1", "value2"); + + result = + ESUtils.getQueryBuilderFromCriterion( + multiValueCriterion, + false, + new HashMap<>(), + mock(OperationContext.class), + QueryFilterRewriteChain.EMPTY); + + expected = + "{\n" + + " \"bool\" : {\n" + + " \"should\" : [\n" + + " {\n" + + " \"term\" : {\n" + + " \"myTestField.keyword\" : {\n" + + " \"value\" : \"value1\",\n" + + " \"case_insensitive\" : true,\n" + + " \"boost\" : 1.0\n" + + " }\n" + + " }\n" + + " },\n" + + " {\n" + + " \"term\" : {\n" + + " \"myTestField.keyword\" : {\n" + + " \"value\" : \"value2\",\n" + + " \"case_insensitive\" : true,\n" + + " \"boost\" : 1.0\n" + + " }\n" + + " }\n" + + " }\n" + + " ],\n" + + " \"adjust_pure_negative\" : true,\n" + + " \"boost\" : 1.0,\n" + + " \"_name\" : \"myTestField\"\n" + + " }\n" + + "}"; + + Assert.assertEquals(result.toString(), expected); + } + @Test public void testGetQueryBuilderFromCriterionContain() { final Criterion singleValueCriterion = @@ -179,6 +259,7 @@ public void testGetQueryBuilderFromCriterionContain() { + " }\n" + " ],\n" + " \"adjust_pure_negative\" : true,\n" + + " \"minimum_should_match\" : \"1\",\n" + " \"boost\" : 1.0\n" + " }\n" + "}"; @@ -222,6 +303,7 @@ public void testGetQueryBuilderFromCriterionContain() { + " }\n" + " ],\n" + " \"adjust_pure_negative\" : true,\n" + + " \"minimum_should_match\" : \"1\",\n" + " \"boost\" : 1.0\n" + " }\n" + "}"; @@ -258,6 +340,7 @@ public void testWildcardQueryBuilderFromCriterionWhenStartsWith() { + " }\n" + " ],\n" + " \"adjust_pure_negative\" : true,\n" + + " \"minimum_should_match\" : \"1\",\n" + " \"boost\" : 1.0\n" + " }\n" + "}"; @@ -301,6 +384,7 @@ public void testWildcardQueryBuilderFromCriterionWhenStartsWith() { + " }\n" + " ],\n" + " \"adjust_pure_negative\" : true,\n" + + " \"minimum_should_match\" : \"1\",\n" + " \"boost\" : 1.0\n" + " }\n" + "}"; @@ -337,6 +421,7 @@ public void testWildcardQueryBuilderFromCriterionWhenEndsWith() { + " }\n" + " ],\n" + " \"adjust_pure_negative\" : true,\n" + + " \"minimum_should_match\" : \"1\",\n" + " \"boost\" : 1.0\n" + " }\n" + "}"; @@ -379,6 +464,7 @@ public void testWildcardQueryBuilderFromCriterionWhenEndsWith() { + " }\n" + " ],\n" + " \"adjust_pure_negative\" : true,\n" + + " \"minimum_should_match\" : \"1\",\n" + " \"boost\" : 1.0\n" + " }\n" + "}"; @@ -540,6 +626,7 @@ public void testGetQueryBuilderFromCriterionFieldToExpand() { + " }\n" + " ],\n" + " \"adjust_pure_negative\" : true,\n" + + " \"minimum_should_match\" : \"1\",\n" + " \"boost\" : 1.0\n" + " }\n" + "}"; @@ -582,6 +669,7 @@ public void testGetQueryBuilderFromCriterionFieldToExpand() { + " }\n" + " ],\n" + " \"adjust_pure_negative\" : true,\n" + + " \"minimum_should_match\" : \"1\",\n" + " \"boost\" : 1.0\n" + " }\n" + "}"; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/PropertyDefinitionValidatorTest.java b/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/PropertyDefinitionValidatorTest.java index 22224f16f2210b..2af731a51145e3 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/PropertyDefinitionValidatorTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/PropertyDefinitionValidatorTest.java @@ -2,7 +2,7 @@ import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; -import static org.testng.AssertJUnit.assertEquals; +import static org.testng.Assert.assertEquals; import com.linkedin.common.UrnArray; import com.linkedin.common.urn.Urn; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeline/eventgenerator/SchemaMetadataChangeEventGeneratorTest.java b/metadata-io/src/test/java/com/linkedin/metadata/timeline/eventgenerator/SchemaMetadataChangeEventGeneratorTest.java index d8d33f4c356bb8..88dd81d953947c 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/timeline/eventgenerator/SchemaMetadataChangeEventGeneratorTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/timeline/eventgenerator/SchemaMetadataChangeEventGeneratorTest.java @@ -1,11 +1,12 @@ package com.linkedin.metadata.timeline.eventgenerator; -import static org.testng.AssertJUnit.assertEquals; +import static org.testng.Assert.assertEquals; import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.StringArray; import com.linkedin.metadata.timeline.data.ChangeEvent; +import com.linkedin.metadata.timeline.data.dataset.DatasetSchemaFieldChangeEvent; import com.linkedin.metadata.timeline.data.dataset.SchemaFieldModificationCategory; import com.linkedin.mxe.SystemMetadata; import com.linkedin.restli.internal.server.util.DataMapUtils; @@ -18,6 +19,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; import org.testng.annotations.Test; @@ -41,18 +43,17 @@ private static void compareDescriptions( changeEvent -> { actualDescriptions.add(changeEvent.getDescription()); }); - assertEquals(expectedDescriptions, actualDescriptions); + assertEquals(actualDescriptions, expectedDescriptions); } private static void compareModificationCategories( Set expectedCategories, List actual) { - Set actualModificationCategories = new HashSet<>(); - actual.forEach( - changeEvent -> { - actualModificationCategories.add( - changeEvent.getParameters().get("modificationCategory").toString()); - }); - assertEquals(expectedCategories, actualModificationCategories); + Set actualModificationCategories = + actual.stream() + .filter(changeEvent -> changeEvent instanceof DatasetSchemaFieldChangeEvent) + .map(changeEvent -> changeEvent.getParameters().get("modificationCategory").toString()) + .collect(Collectors.toSet()); + assertEquals(actualModificationCategories, expectedCategories); } private static Aspect getSchemaMetadata(List schemaFieldList) { @@ -236,6 +237,99 @@ public void testDelete() throws Exception { assertEquals(14, actual.size()); } + @Test + public void testSchemaFieldPrimaryKeyChangeRenameAdd() throws Exception { + // When a rename cannot be detected, treated as drop -> add + SchemaMetadataChangeEventGenerator test = new SchemaMetadataChangeEventGenerator(); + + Urn urn = getTestUrn(); + String entity = "dataset"; + String aspect = "schemaMetadata"; + AuditStamp auditStamp = getTestAuditStamp(); + + Aspect from = + getSchemaMetadata( + List.of( + new SchemaField() + .setFieldPath("ID") + .setNativeDataType("NUMBER(16,1)") + .setDescription("My Description"), + new SchemaField() + .setFieldPath("ID2") + .setNativeDataType("NUMBER(16,1)") + .setDescription("My Other Description"))); + from.getValue().setPrimaryKeys(new StringArray(List.of("ID"))); + Aspect to3 = + getSchemaMetadata( + List.of( + new SchemaField() + .setFieldPath("ID") + .setNativeDataType("NUMBER(16,1)") + .setDescription("My Description"), + new SchemaField() + .setFieldPath("ID2") + .setNativeDataType("NUMBER(16,1)") + .setDescription("My Other Description"))); + to3.getValue().setPrimaryKeys(new StringArray(List.of("ID2"))); + List actual = test.getChangeEvents(urn, entity, aspect, from, to3, auditStamp); + compareDescriptions( + Set.of( + "A backwards incompatible change due to a primary key constraint change. " + + "The following fields were removed: 'ID'. The following fields were added: 'ID2'."), + actual); + assertEquals(1, actual.size()); + compareModificationCategories(Set.of(SchemaFieldModificationCategory.OTHER.toString()), actual); + + Aspect to4 = + getSchemaMetadata( + List.of( + new SchemaField() + .setFieldPath("IDZ") + .setNativeDataType("NUMBER(16,1)") + .setDescription("My Description"), + new SchemaField() + .setFieldPath("ID2") + .setNativeDataType("NUMBER(16,1)") + .setDescription("My Other Description"))); + to4.getValue().setPrimaryKeys(new StringArray(List.of("ID2"))); + + List actual2 = test.getChangeEvents(urn, entity, aspect, to3, to4, auditStamp); + compareDescriptions( + Set.of( + "A forwards & backwards compatible change due to renaming of the field 'ID to IDZ'."), + actual2); + assertEquals(1, actual2.size()); + compareModificationCategories( + Set.of(SchemaFieldModificationCategory.RENAME.toString()), actual2); + + Aspect to5 = + getSchemaMetadata( + List.of( + new SchemaField() + .setFieldPath("IDZ") + .setNativeDataType("NUMBER(16,1)") + .setDescription("My Description"), + new SchemaField() + .setFieldPath("ID1") + .setNativeDataType("NUMBER(16,1)") + .setDescription("My Third Description"), + new SchemaField() + .setFieldPath("ID2") + .setNativeDataType("NUMBER(16,1)") + .setDescription("My Other Description"))); + to5.getValue().setPrimaryKeys(new StringArray(List.of("ID2"))); + + List actual3 = test.getChangeEvents(urn, entity, aspect, to4, to5, auditStamp); + compareDescriptions( + Set.of( + "A forwards & backwards compatible change due to the newly added field 'ID1'.", + "The description 'My Third Description' for the field 'ID1' has been added."), + actual3); + assertEquals(actual3.size(), 2); + compareModificationCategories( + Set.of(SchemaFieldModificationCategory.OTHER.toString()), actual3); + } + // CHECKSTYLE:OFF private static final String TEST_OBJECT = "{\"platformSchema\":{\"com.linkedin.schema.KafkaSchema\":{\"documentSchema\":\"{\\\"type\\\":\\\"record\\\",\\\"name\\\":\\\"SampleHdfsSchema\\\",\\\"namespace\\\":\\\"com.linkedin.dataset\\\",\\\"doc\\\":\\\"Sample HDFS dataset\\\",\\\"fields\\\":[{\\\"name\\\":\\\"field_foo\\\",\\\"type\\\":[\\\"string\\\"]},{\\\"name\\\":\\\"field_bar\\\",\\\"type\\\":[\\\"boolean\\\"]}]}\"}},\"created\":{\"actor\":\"urn:li:corpuser:jdoe\",\"time\":1674291843000},\"lastModified\":{\"actor\":\"urn:li:corpuser:jdoe\",\"time\":1674291843000},\"fields\":[{\"nullable\":false,\"fieldPath\":\"shipment_info\",\"description\":\"Shipment info description\",\"isPartOfKey\":false,\"type\":{\"type\":{\"com.linkedin.schema.RecordType\":{}}},\"recursive\":false,\"nativeDataType\":\"varchar(100)\"},{\"nullable\":false,\"fieldPath\":\"shipment_info.date\",\"description\":\"Shipment info date description\",\"isPartOfKey\":false,\"type\":{\"type\":{\"com.linkedin.schema.DateType\":{}}},\"recursive\":false,\"nativeDataType\":\"Date\"},{\"nullable\":false,\"fieldPath\":\"shipment_info.target\",\"description\":\"Shipment info target description\",\"isPartOfKey\":false,\"type\":{\"type\":{\"com.linkedin.schema.StringType\":{}}},\"recursive\":false,\"nativeDataType\":\"text\"},{\"nullable\":false,\"fieldPath\":\"shipment_info.destination\",\"description\":\"Shipment info destination description\",\"isPartOfKey\":false,\"type\":{\"type\":{\"com.linkedin.schema.StringType\":{}}},\"recursive\":false,\"nativeDataType\":\"varchar(100)\"},{\"nullable\":false,\"fieldPath\":\"shipment_info.geo_info\",\"description\":\"Shipment info geo_info description\",\"isPartOfKey\":false,\"type\":{\"type\":{\"com.linkedin.schema.RecordType\":{}}},\"recursive\":false,\"nativeDataType\":\"varchar(100)\"},{\"nullable\":false,\"fieldPath\":\"shipment_info.geo_info.lat\",\"description\":\"Shipment info geo_info lat\",\"isPartOfKey\":false,\"type\":{\"type\":{\"com.linkedin.schema.NumberType\":{}}},\"recursive\":false,\"nativeDataType\":\"float\"},{\"nullable\":false,\"fieldPath\":\"shipment_info.geo_info.lng\",\"description\":\"Shipment info geo_info lng\",\"isPartOfKey\":false,\"type\":{\"type\":{\"com.linkedin.schema.NumberType\":{}}},\"recursive\":false,\"nativeDataType\":\"float\"}],\"schemaName\":\"SampleHdfsSchema\",\"version\":0,\"hash\":\"\",\"platform\":\"urn:li:dataPlatform:hdfs\"}"; diff --git a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/LineageExporter.java b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/LineageExporter.java index 4b7d81aa04416a..4a2411138ed67b 100644 --- a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/LineageExporter.java +++ b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/LineageExporter.java @@ -58,7 +58,7 @@ public void exportGraphIndex( Set urns, Set visitedUrns, Set visitedIds, int hops) { Set nextIds = new HashSet<>(); if (!urns.isEmpty()) { - BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery(); + BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery().minimumShouldMatch(1); boolQueryBuilder.must(QueryBuilders.termQuery("relationshipType", "DownstreamOf")); @@ -70,7 +70,6 @@ public void exportGraphIndex( boolQueryBuilder.should( QueryBuilders.termsQuery("destination.urn", batch.toArray(String[]::new))); }); - boolQueryBuilder.minimumShouldMatch(1); // Exclude visited Lists.partition(Arrays.asList(visitedIds.toArray(String[]::new)), queryStatementSize) @@ -144,7 +143,10 @@ public void exportEntityIndex(Set ids, Set visitedIds, int hops) batch -> boolQueryBuilder.should( QueryBuilders.idsQuery().addIds(batch.toArray(String[]::new)))); - boolQueryBuilder.minimumShouldMatch(1); + + if (!boolQueryBuilder.should().isEmpty()) { + boolQueryBuilder.minimumShouldMatch(1); + } // Exclude visited Lists.partition(Arrays.asList(visitedIds.toArray(String[]::new)), queryStatementSize) diff --git a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java index 781201f3478f98..e47cdf80281c9a 100644 --- a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java +++ b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java @@ -6,12 +6,10 @@ import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; -import com.fasterxml.jackson.dataformat.yaml.YAMLMapper; import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.client.JavaEntityClient; import com.linkedin.metadata.config.PreProcessHooks; import com.linkedin.metadata.config.cache.EntityDocCountCacheConfiguration; -import com.linkedin.metadata.config.search.CustomConfiguration; import com.linkedin.metadata.config.search.ElasticSearchConfiguration; import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; @@ -73,7 +71,9 @@ public class SampleDataFixtureConfiguration { @Autowired private SearchConfiguration _searchConfiguration; - @Autowired private CustomSearchConfiguration _customSearchConfiguration; + @Autowired + @Qualifier("fixtureCustomSearchConfig") + private CustomSearchConfiguration _customSearchConfiguration; @Autowired private QueryFilterRewriteChain queryFilterRewriteChain; @@ -188,11 +188,6 @@ protected ElasticSearchService longTailEntitySearchService( protected ElasticSearchService entitySearchServiceHelper(EntityIndexBuilders indexBuilders) throws IOException { - CustomConfiguration customConfiguration = new CustomConfiguration(); - customConfiguration.setEnabled(true); - customConfiguration.setFile("search_config_fixture_test.yml"); - CustomSearchConfiguration customSearchConfiguration = - customConfiguration.resolve(new YAMLMapper()); ESSearchDAO searchDAO = new ESSearchDAO( @@ -200,8 +195,9 @@ protected ElasticSearchService entitySearchServiceHelper(EntityIndexBuilders ind false, ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, _searchConfiguration, - customSearchConfiguration, - queryFilterRewriteChain); + _customSearchConfiguration, + queryFilterRewriteChain, + true); ESBrowseDAO browseDAO = new ESBrowseDAO( _searchClient, diff --git a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java index a7603f97792e70..889473d32d1a35 100644 --- a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java +++ b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java @@ -67,7 +67,9 @@ public class SearchLineageFixtureConfiguration { @Autowired private SearchConfiguration searchConfiguration; - @Autowired private CustomSearchConfiguration customSearchConfiguration; + @Autowired + @Qualifier("fixtureCustomSearchConfig") + private CustomSearchConfiguration customSearchConfiguration; @Bean(name = "searchLineagePrefix") protected String indexPrefix() { @@ -141,7 +143,7 @@ protected ElasticSearchService entitySearchService( false, ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, searchConfiguration, - null, + customSearchConfiguration, queryFilterRewriteChain); ESBrowseDAO browseDAO = new ESBrowseDAO( diff --git a/metadata-io/src/test/java/io/datahubproject/test/search/config/SearchCommonTestConfiguration.java b/metadata-io/src/test/java/io/datahubproject/test/search/config/SearchCommonTestConfiguration.java index 547ab1d746dbe7..e84ecf677e3a6d 100644 --- a/metadata-io/src/test/java/io/datahubproject/test/search/config/SearchCommonTestConfiguration.java +++ b/metadata-io/src/test/java/io/datahubproject/test/search/config/SearchCommonTestConfiguration.java @@ -44,14 +44,22 @@ public SearchConfiguration searchConfiguration() { return searchConfiguration; } - @Bean - public CustomSearchConfiguration customSearchConfiguration() throws Exception { + @Bean("defaultTestCustomSearchConfig") + public CustomSearchConfiguration defaultTestCustomSearchConfig() throws Exception { CustomConfiguration customConfiguration = new CustomConfiguration(); customConfiguration.setEnabled(true); customConfiguration.setFile("search_config_builder_test.yml"); return customConfiguration.resolve(new YAMLMapper()); } + @Bean("fixtureCustomSearchConfig") + public CustomSearchConfiguration fixtureCustomSearchConfig() throws Exception { + CustomConfiguration customConfiguration = new CustomConfiguration(); + customConfiguration.setEnabled(true); + customConfiguration.setFile("search_config_fixture_test.yml"); + return customConfiguration.resolve(new YAMLMapper()); + } + @Bean(name = "queryOperationContext") public OperationContext queryOperationContext() { return TestOperationContexts.systemContextNoSearchAuthorization(); diff --git a/metadata-io/src/test/resources/elasticsearch/sample_filters/lineage_query_filters_full.json b/metadata-io/src/test/resources/elasticsearch/sample_filters/lineage_query_filters_full.json index 0a1cee08414a9d..1aa1b4b5088a1d 100644 --- a/metadata-io/src/test/resources/elasticsearch/sample_filters/lineage_query_filters_full.json +++ b/metadata-io/src/test/resources/elasticsearch/sample_filters/lineage_query_filters_full.json @@ -33,11 +33,13 @@ } ], "adjust_pure_negative" : true, + "minimum_should_match" : "1", "boost" : 1.0 } } ], "adjust_pure_negative" : true, + "minimum_should_match" : "1", "boost" : 1.0 } }, @@ -115,6 +117,7 @@ } ], "adjust_pure_negative" : true, + "minimum_should_match" : "1", "boost" : 1.0 } }, @@ -156,6 +159,7 @@ } ], "adjust_pure_negative" : true, + "minimum_should_match" : "1", "boost" : 1.0 } }, @@ -194,6 +198,7 @@ } ], "adjust_pure_negative" : true, + "minimum_should_match" : "1", "boost" : 1.0 } } @@ -212,6 +217,7 @@ } ], "adjust_pure_negative" : true, + "minimum_should_match" : "1", "boost" : 1.0 } } diff --git a/metadata-io/src/test/resources/elasticsearch/sample_filters/lineage_query_filters_full_empty_filters.json b/metadata-io/src/test/resources/elasticsearch/sample_filters/lineage_query_filters_full_empty_filters.json index ab2841d6602d82..5ba0e36456889a 100644 --- a/metadata-io/src/test/resources/elasticsearch/sample_filters/lineage_query_filters_full_empty_filters.json +++ b/metadata-io/src/test/resources/elasticsearch/sample_filters/lineage_query_filters_full_empty_filters.json @@ -33,11 +33,13 @@ } ], "adjust_pure_negative" : true, + "minimum_should_match" : "1", "boost" : 1.0 } } ], "adjust_pure_negative" : true, + "minimum_should_match" : "1", "boost" : 1.0 } }, diff --git a/metadata-io/src/test/resources/elasticsearch/sample_filters/lineage_query_filters_full_multiple_filters.json b/metadata-io/src/test/resources/elasticsearch/sample_filters/lineage_query_filters_full_multiple_filters.json index 39f595e0e8dd2d..938d878a9c8d19 100644 --- a/metadata-io/src/test/resources/elasticsearch/sample_filters/lineage_query_filters_full_multiple_filters.json +++ b/metadata-io/src/test/resources/elasticsearch/sample_filters/lineage_query_filters_full_multiple_filters.json @@ -36,11 +36,13 @@ } ], "adjust_pure_negative" : true, + "minimum_should_match" : "1", "boost" : 1.0 } } ], "adjust_pure_negative" : true, + "minimum_should_match" : "1", "boost" : 1.0 } }, @@ -122,6 +124,7 @@ } ], "adjust_pure_negative" : true, + "minimum_should_match" : "1", "boost" : 1.0 } }, @@ -163,6 +166,7 @@ } ], "adjust_pure_negative" : true, + "minimum_should_match" : "1", "boost" : 1.0 } }, @@ -201,6 +205,7 @@ } ], "adjust_pure_negative" : true, + "minimum_should_match" : "1", "boost" : 1.0 } } @@ -219,6 +224,7 @@ } ], "adjust_pure_negative" : true, + "minimum_should_match" : "1", "boost" : 1.0 } } diff --git a/metadata-io/src/test/resources/elasticsearch/sample_filters/lineage_query_filters_limited.json b/metadata-io/src/test/resources/elasticsearch/sample_filters/lineage_query_filters_limited.json index 95d468ec3dac8e..24fbb56065ebf4 100644 --- a/metadata-io/src/test/resources/elasticsearch/sample_filters/lineage_query_filters_limited.json +++ b/metadata-io/src/test/resources/elasticsearch/sample_filters/lineage_query_filters_limited.json @@ -27,6 +27,7 @@ } ], "adjust_pure_negative" : true, + "minimum_should_match" : "1", "boost" : 1.0 } } \ No newline at end of file diff --git a/metadata-io/src/test/resources/elasticsearch/sample_filters/lineage_time_query_filters_1.json b/metadata-io/src/test/resources/elasticsearch/sample_filters/lineage_time_query_filters_1.json index 327f1d4ff93389..13eb02fb61a4e5 100644 --- a/metadata-io/src/test/resources/elasticsearch/sample_filters/lineage_time_query_filters_1.json +++ b/metadata-io/src/test/resources/elasticsearch/sample_filters/lineage_time_query_filters_1.json @@ -56,6 +56,7 @@ } ], "adjust_pure_negative" : true, + "minimum_should_match" : "1", "boost" : 1.0 } }, @@ -97,6 +98,7 @@ } ], "adjust_pure_negative" : true, + "minimum_should_match" : "1", "boost" : 1.0 } }, @@ -135,6 +137,7 @@ } ], "adjust_pure_negative" : true, + "minimum_should_match" : "1", "boost" : 1.0 } } @@ -153,6 +156,7 @@ } ], "adjust_pure_negative" : true, + "minimum_should_match" : "1", "boost" : 1.0 } } \ No newline at end of file diff --git a/metadata-jobs/mae-consumer-job/src/main/java/com/linkedin/metadata/kafka/CustomDispatcherServlet.java b/metadata-jobs/mae-consumer-job/src/main/java/com/linkedin/metadata/kafka/CustomDispatcherServlet.java new file mode 100644 index 00000000000000..eea3087dce1212 --- /dev/null +++ b/metadata-jobs/mae-consumer-job/src/main/java/com/linkedin/metadata/kafka/CustomDispatcherServlet.java @@ -0,0 +1,18 @@ +package com.linkedin.metadata.kafka; + +import jakarta.servlet.ServletException; +import jakarta.servlet.http.HttpServletRequest; +import jakarta.servlet.http.HttpServletResponse; +import java.io.IOException; +import org.springframework.stereotype.Component; +import org.springframework.web.servlet.DispatcherServlet; + +@Component("dispatcherServlet") +public class CustomDispatcherServlet extends DispatcherServlet { + + @Override + protected void doTrace(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + response.sendError(HttpServletResponse.SC_METHOD_NOT_ALLOWED); + } +} diff --git a/metadata-jobs/mce-consumer-job/src/main/java/com/linkedin/metadata/kafka/CustomDispatcherServlet.java b/metadata-jobs/mce-consumer-job/src/main/java/com/linkedin/metadata/kafka/CustomDispatcherServlet.java new file mode 100644 index 00000000000000..eea3087dce1212 --- /dev/null +++ b/metadata-jobs/mce-consumer-job/src/main/java/com/linkedin/metadata/kafka/CustomDispatcherServlet.java @@ -0,0 +1,18 @@ +package com.linkedin.metadata.kafka; + +import jakarta.servlet.ServletException; +import jakarta.servlet.http.HttpServletRequest; +import jakarta.servlet.http.HttpServletResponse; +import java.io.IOException; +import org.springframework.stereotype.Component; +import org.springframework.web.servlet.DispatcherServlet; + +@Component("dispatcherServlet") +public class CustomDispatcherServlet extends DispatcherServlet { + + @Override + protected void doTrace(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + response.sendError(HttpServletResponse.SC_METHOD_NOT_ALLOWED); + } +} diff --git a/metadata-models/docs/entities/dataPlatform.md b/metadata-models/docs/entities/dataPlatform.md index 58ca83c9c6bbc5..23b7c733cd03e6 100644 --- a/metadata-models/docs/entities/dataPlatform.md +++ b/metadata-models/docs/entities/dataPlatform.md @@ -6,4 +6,4 @@ Examples of data platforms are `redshift`, `hive`, `bigquery`, `looker`, `tablea ## Identity -Data Platforms are identified by the name of the technology. A complete list of currently supported data platforms is available [here](https://raw.githubusercontent.com/datahub-project/datahub/master/metadata-service/war/src/main/resources/boot/data_platforms.json). \ No newline at end of file +Data Platforms are identified by the name of the technology. A complete list of currently supported data platforms is available [here](https://github.com/datahub-project/datahub/blob/master/metadata-service/configuration/src/main/resources/bootstrap_mcps/data-platforms.yaml). \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/common/DocumentationAssociation.pdl b/metadata-models/src/main/pegasus/com/linkedin/common/DocumentationAssociation.pdl index 19404346797bb0..bee331f15b6797 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/common/DocumentationAssociation.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/common/DocumentationAssociation.pdl @@ -15,15 +15,18 @@ record DocumentationAssociation { @Searchable = { "/time": { "fieldName": "documentationAttributionDates", - "fieldType": "DATETIME" + "fieldType": "DATETIME", + "queryByDefault": false, }, "/actor": { "fieldName": "documentationAttributionActors", - "fieldType": "URN" + "fieldType": "URN", + "queryByDefault": false, }, "/source": { "fieldName": "documentationAttributionSources", - "fieldType": "URN" + "fieldType": "URN", + "queryByDefault": false, }, } attribution: optional MetadataAttribution diff --git a/metadata-models/src/main/pegasus/com/linkedin/common/Forms.pdl b/metadata-models/src/main/pegasus/com/linkedin/common/Forms.pdl index 0a97c7d5099ed8..3c05c00fd6fb97 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/common/Forms.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/common/Forms.pdl @@ -13,19 +13,23 @@ record Forms { @Searchable = { "/*/urn": { "fieldType": "URN", - "fieldName": "incompleteForms" + "fieldName": "incompleteForms", + "queryByDefault": false, }, "/*/completedPrompts/*/id" : { "fieldType": "KEYWORD", "fieldName": "incompleteFormsCompletedPromptIds", + "queryByDefault": false, }, "/*/incompletePrompts/*/id" : { "fieldType": "KEYWORD", "fieldName": "incompleteFormsIncompletePromptIds", + "queryByDefault": false, }, "/*/completedPrompts/*/lastModified/time" : { "fieldType": "DATETIME", "fieldName": "incompleteFormsCompletedPromptResponseTimes", + "queryByDefault": false, } } incompleteForms: array[FormAssociation] @@ -36,19 +40,23 @@ record Forms { @Searchable = { "/*/urn": { "fieldType": "URN", - "fieldName": "completedForms" + "fieldName": "completedForms", + "queryByDefault": false }, "/*/completedPrompts/*/id" : { "fieldType": "KEYWORD", "fieldName": "completedFormsCompletedPromptIds", + "queryByDefault": false, }, "/*/incompletePrompts/*/id" : { "fieldType": "KEYWORD", "fieldName": "completedFormsIncompletePromptIds", + "queryByDefault": false, }, "/*/completedPrompts/*/lastModified/time" : { "fieldType": "DATETIME", "fieldName": "completedFormsCompletedPromptResponseTimes", + "queryByDefault": false, } } completedForms: array[FormAssociation] @@ -59,7 +67,8 @@ record Forms { @Searchable = { "/*/form": { "fieldType": "URN", - "fieldName": "verifiedForms" + "fieldName": "verifiedForms", + "queryByDefault": false, } } verifications: array[FormVerificationAssociation] = [] diff --git a/metadata-models/src/main/pegasus/com/linkedin/common/GlossaryTermAssociation.pdl b/metadata-models/src/main/pegasus/com/linkedin/common/GlossaryTermAssociation.pdl index a5267bbc635e43..58423ccc2228db 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/common/GlossaryTermAssociation.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/common/GlossaryTermAssociation.pdl @@ -36,15 +36,18 @@ record GlossaryTermAssociation { @Searchable = { "/time": { "fieldName": "termAttributionDates", - "fieldType": "DATETIME" + "fieldType": "DATETIME", + "queryByDefault": false, }, "/actor": { "fieldName": "termAttributionActors", - "fieldType": "URN" + "fieldType": "URN", + "queryByDefault": false, }, "/source": { "fieldName": "termAttributionSources", - "fieldType": "URN" + "fieldType": "URN", + "queryByDefault": false, }, } attribution: optional MetadataAttribution diff --git a/metadata-models/src/main/pegasus/com/linkedin/common/IncidentsSummary.pdl b/metadata-models/src/main/pegasus/com/linkedin/common/IncidentsSummary.pdl index e1367a326e24bb..9e4e81656f7c9a 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/common/IncidentsSummary.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/common/IncidentsSummary.pdl @@ -29,7 +29,8 @@ record IncidentsSummary { "fieldType": "URN", "fieldName": "resolvedIncidents", "hasValuesFieldName": "hasResolvedIncidents", - "numValuesFieldName": "numResolvedIncidents" + "numValuesFieldName": "numResolvedIncidents", + "queryByDefault": false, }, "/*/type" : { "fieldType": "KEYWORD", @@ -65,7 +66,8 @@ record IncidentsSummary { "fieldName": "activeIncidents", "hasValuesFieldName": "hasActiveIncidents", "numValuesFieldName": "numActiveIncidents", - "addHasValuesToFilters": true + "addHasValuesToFilters": true, + "queryByDefault": false, }, "/*/type" : { "fieldType": "KEYWORD", diff --git a/metadata-models/src/main/pegasus/com/linkedin/common/RoleAssociation.pdl b/metadata-models/src/main/pegasus/com/linkedin/common/RoleAssociation.pdl index ddd63ed64014b0..05c46dfdf69d76 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/common/RoleAssociation.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/common/RoleAssociation.pdl @@ -18,7 +18,8 @@ record RoleAssociation { "fieldType": "URN", "hasValuesFieldName": "hasRoles", "addToFilters": true, - "filterNameOverride": "Role" + "filterNameOverride": "Role", + "queryByDefault": false, } urn: Urn } \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/common/SerializedValue.pdl b/metadata-models/src/main/pegasus/com/linkedin/common/SerializedValue.pdl new file mode 100644 index 00000000000000..ae1d00c568325d --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/common/SerializedValue.pdl @@ -0,0 +1,42 @@ +namespace com.linkedin.common + +/** + * Captures the serialized value of a (usually) schema-d blob. + */ +record SerializedValue { + /** + * The serialized blob value. + */ + blob: bytes + + /** + * The content-type of the serialized blob value. + */ + contentType: enum SerializedValueContentType { + JSON, + BINARY + } = "JSON" + + /** + * The schema type for the schema that models the object that was serialized + into the blob. + * Absence of this field indicates that the schema is not known. + * If the schema is known, the value should be set to the appropriate schema + * type. + * Use the NONE value if the existing schema categories do not apply. + */ + schemaType: optional enum SerializedValueSchemaType { + AVRO + PROTOBUF + PEGASUS + THRIFT + JSON + NONE + } + + /** + * An optional reference to the schema that models the object. + * e.g., 'com.linkedin.platformresource.slack.SlackConversation' + */ + schemaRef: optional string +} \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/common/SubTypes.pdl b/metadata-models/src/main/pegasus/com/linkedin/common/SubTypes.pdl index a1063afe1eae94..1f2ff275c7ae9f 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/common/SubTypes.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/common/SubTypes.pdl @@ -17,7 +17,7 @@ record SubTypes { "fieldType": "KEYWORD", "addToFilters": true, "filterNameOverride": "Sub Type", - "queryByDefault": true + "queryByDefault": false, } } typeNames: array[string] diff --git a/metadata-models/src/main/pegasus/com/linkedin/common/TagAssociation.pdl b/metadata-models/src/main/pegasus/com/linkedin/common/TagAssociation.pdl index 8a58ca97de1956..caed4961272ded 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/common/TagAssociation.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/common/TagAssociation.pdl @@ -21,15 +21,18 @@ record TagAssociation { @Searchable = { "/time": { "fieldName": "tagAttributionDates", - "fieldType": "DATETIME" + "fieldType": "DATETIME", + "queryByDefault": false, }, "/actor": { "fieldName": "tagAttributionActors", - "fieldType": "URN" + "fieldType": "URN", + "queryByDefault": false, }, "/source": { "fieldName": "tagAttributionSources", - "fieldType": "URN" + "fieldType": "URN", + "queryByDefault": false, }, } attribution: optional MetadataAttribution diff --git a/metadata-models/src/main/pegasus/com/linkedin/glossary/GlossaryRelatedTerms.pdl b/metadata-models/src/main/pegasus/com/linkedin/glossary/GlossaryRelatedTerms.pdl index 5e10219235347a..9f5f312a6bccec 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/glossary/GlossaryRelatedTerms.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/glossary/GlossaryRelatedTerms.pdl @@ -24,7 +24,7 @@ record GlossaryRelatedTerms { "/*": { "fieldName": "isRelatedTerms", "fieldType": "URN", - "boostScore": 2.0 + "queryByDefault": false, } } isRelatedTerms: optional array[GlossaryTermUrn] @@ -42,7 +42,7 @@ record GlossaryRelatedTerms { "/*": { "fieldName": "hasRelatedTerms", "fieldType": "URN", - "boostScore": 2.0 + "queryByDefault": false, } } hasRelatedTerms: optional array[GlossaryTermUrn] diff --git a/metadata-models/src/main/pegasus/com/linkedin/identity/CorpUserInfo.pdl b/metadata-models/src/main/pegasus/com/linkedin/identity/CorpUserInfo.pdl index 382b120fa942a7..53c31daeca4373 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/identity/CorpUserInfo.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/identity/CorpUserInfo.pdl @@ -62,7 +62,7 @@ record CorpUserInfo includes CustomProperties { @Searchable = { "fieldName": "managerLdap", "fieldType": "URN", - "queryByDefault": true + "queryByDefault": false, } managerUrn: optional CorpuserUrn diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/query/filter/Condition.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/query/filter/Condition.pdl index a79055ea3db547..193e7628546294 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/metadata/query/filter/Condition.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/query/filter/Condition.pdl @@ -20,6 +20,11 @@ enum Condition { */ EQUAL + /** + * Represent the relation: field = value and support case insensitive values, e.g. platform = hdfs + */ + IEQUAL + /** * Represent the relation: field is null, e.g. platform is null */ diff --git a/metadata-models/src/main/pegasus/com/linkedin/platformresource/PlatformResourceInfo.pdl b/metadata-models/src/main/pegasus/com/linkedin/platformresource/PlatformResourceInfo.pdl new file mode 100644 index 00000000000000..32dff19b44a53a --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/platformresource/PlatformResourceInfo.pdl @@ -0,0 +1,57 @@ +namespace com.linkedin.platformresource + +import com.linkedin.common.SerializedValue + +/** + * Platform Resource Info. + * These entities are for miscelaneous data that is used in non-core parts of the system. + * For instance, if we want to persist & retrieve data from auxiliary integrations such as Slack or Microsoft Teams. + */ +@Aspect = { + "name": "platformResourceInfo" +} +record PlatformResourceInfo { + /** + * The type of the resource. + * Intended as a loose specifier of the generic type of the resource. + * Producer is not forced to conform to a specific set of symbols for + * resource types. + * The @PlatformResourceType enumeration offers a paved path for agreed upon + * common terms, but is not required to be followed. + * Example values could be: conversation, user, grant, etc. + * Resource types are indexed for ease of access. + * e.g. Get me all platform resources of type user for the platform looker + */ + @Searchable = { + "fieldType": "KEYWORD" + } + resourceType: string + + /** + * The primary key for this platform resource. + * e.g. for a slack member this would be the memberID. + * primary keys specified here don't need to include any additional specificity for the + dataPlatform + * The @PlatformResourceKey is supposed to represent that + */ + @Searchable = { + "fieldType": "KEYWORD" + } + primaryKey: string + + /** + * The secondary keys this platform resource can be located by. + * I.e., for a slack member this would be email or phone. + */ + @Searchable = { + "/*": { + "fieldType": "KEYWORD" + } + } + secondaryKeys: optional array[string] + + /** + * The serialized value of this platform resource item. + */ + value: optional SerializedValue +} \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/platformresource/PlatformResourceKey.pdl b/metadata-models/src/main/pegasus/com/linkedin/platformresource/PlatformResourceKey.pdl new file mode 100644 index 00000000000000..8514c73bc26eb8 --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/platformresource/PlatformResourceKey.pdl @@ -0,0 +1,28 @@ +namespace com.linkedin.platformresource + +/** + * Key for a Platform Resource. + * Platform Resources are assets that are not part of the core data model. + * They are stored in DataHub primarily to help with application-specific + * use-cases that are not sufficiently generalized to move into the core data model. + * For instance, if we want to persist & retrieve additional user profile data + * from auxiliary integrations such as Slack or Microsoft Teams for resolving details later. + */ +@Aspect = { + "name": "platformResourceKey" +} +record PlatformResourceKey { + /** + * A unique id for this entity. + * There are no constraints on the format of this id, but most implementations + * will choose to use a UUID. + * This id should be globally unique for the entire DataHub instance and + uniquely identify the resource that is being stored, so most + implementations + * will combine logical attributes like platform name, platform instance, + * platform-specific-id and the resource type to create the unique id. + * e.g. slack:slack-instance:slack-user-id:user-info + * or guid(slack, slack-instance, slack-user-id, user-info) etc. + */ + id: string +} \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/platformresource/PlatformResourceType.pdl b/metadata-models/src/main/pegasus/com/linkedin/platformresource/PlatformResourceType.pdl new file mode 100644 index 00000000000000..2f36eda9141abb --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/platformresource/PlatformResourceType.pdl @@ -0,0 +1,17 @@ +namespace com.linkedin.platformresource + +/** +* A set of symbols for loose agreements between producers and consumers of + platform resources + See @PlatformResourceInfo.resourceType for where this can be populated into + **/ +enum PlatformResourceType { + /** + * e.g. a Slack member resource, Looker user resource, etc. + */ + USER_INFO, + /** + * e.g. a Slack channel + */ + CONVERSATION +} diff --git a/metadata-models/src/main/pegasus/com/linkedin/schema/EditableSchemaFieldInfo.pdl b/metadata-models/src/main/pegasus/com/linkedin/schema/EditableSchemaFieldInfo.pdl index 816277bd1e0c96..048c2dcd9f58f6 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/schema/EditableSchemaFieldInfo.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/schema/EditableSchemaFieldInfo.pdl @@ -43,11 +43,13 @@ record EditableSchemaFieldInfo { }, "/tags/*/attribution/actor": { "fieldName": "editedFieldTagAttributionActors", - "fieldType": "URN" + "fieldType": "URN", + "queryByDefault": false, }, "/tags/*/attribution/source": { "fieldName": "editedFieldTagAttributionSources", - "fieldType": "URN" + "fieldType": "URN", + "queryByDefault": false, }, } globalTags: optional GlobalTags @@ -73,11 +75,13 @@ record EditableSchemaFieldInfo { }, "/terms/*/attribution/actor": { "fieldName": "editedFieldTermAttributionActors", - "fieldType": "URN" + "fieldType": "URN", + "queryByDefault": false, }, "/terms/*/attribution/source": { "fieldName": "editedFieldTermAttributionSources", - "fieldType": "URN" + "fieldType": "URN", + "queryByDefault": false, }, } glossaryTerms: optional GlossaryTerms diff --git a/metadata-models/src/main/pegasus/com/linkedin/schema/SchemaField.pdl b/metadata-models/src/main/pegasus/com/linkedin/schema/SchemaField.pdl index f91e2004401cf9..0b72d376b0be49 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/schema/SchemaField.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/schema/SchemaField.pdl @@ -99,15 +99,18 @@ record SchemaField { }, "/tags/*/attribution/time": { "fieldName": "fieldTagAttributionDates", - "fieldType": "DATETIME" + "fieldType": "DATETIME", + "queryByDefault": false, }, "/tags/*/attribution/actor": { "fieldName": "fieldTagAttributionActors", - "fieldType": "URN" + "fieldType": "URN", + "queryByDefault": false, }, "/tags/*/attribution/source": { "fieldName": "fieldTagAttributionSources", - "fieldType": "URN" + "fieldType": "URN", + "queryByDefault": false, }, } globalTags: optional GlobalTags @@ -129,15 +132,18 @@ record SchemaField { }, "/terms/*/attribution/time": { "fieldName": "fieldTermAttributionDates", - "fieldType": "DATETIME" + "fieldType": "DATETIME", + "queryByDefault": false, }, "/terms/*/attribution/actor": { "fieldName": "fieldTermAttributionActors", - "fieldType": "URN" + "fieldType": "URN", + "queryByDefault": false, }, "/terms/*/attribution/source": { "fieldName": "fieldTermAttributionSources", - "fieldType": "URN" + "fieldType": "URN", + "queryByDefault": false, }, } glossaryTerms: optional GlossaryTerms diff --git a/metadata-models/src/main/pegasus/com/linkedin/test/TestResults.pdl b/metadata-models/src/main/pegasus/com/linkedin/test/TestResults.pdl index 6f210abf0597f4..c8a99faef88a0d 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/test/TestResults.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/test/TestResults.pdl @@ -15,6 +15,7 @@ record TestResults { "fieldType": "URN", "fieldName": "failingTests" "hasValuesFieldName": "hasFailingTests", + "queryByDefault": false, } } @Relationship = { @@ -33,6 +34,7 @@ record TestResults { "fieldType": "URN", "fieldName": "passingTests", "hasValuesFieldName": "hasPassingTests", + "queryByDefault": false, } } @Relationship = { diff --git a/metadata-models/src/main/resources/entity-registry.yml b/metadata-models/src/main/resources/entity-registry.yml index 7beb08a6b1032f..9b692b51dc2b5f 100644 --- a/metadata-models/src/main/resources/entity-registry.yml +++ b/metadata-models/src/main/resources/entity-registry.yml @@ -615,6 +615,17 @@ entities: aspects: - dataHubConnectionDetails - dataPlatformInstance + - name: platformResource + doc: >- + Platform Resources are assets that are unmodeled and stored outside of + the core data model. They are stored in DataHub primarily to help with + application-specific use-cases that are not sufficiently generalized to move into the core data model. + category: core + keyAspect: platformResourceKey + aspects: + - dataPlatformInstance + - platformResourceInfo + - status events: plugins: aspectPayloadValidators: diff --git a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/ObjectMapperContext.java b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/ObjectMapperContext.java index 2e96e48338a661..a25deee23850a2 100644 --- a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/ObjectMapperContext.java +++ b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/ObjectMapperContext.java @@ -3,7 +3,9 @@ import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.core.StreamReadConstraints; import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; import com.linkedin.metadata.Constants; +import java.util.List; import java.util.Optional; import javax.annotation.Nonnull; import lombok.Builder; @@ -14,23 +16,29 @@ public class ObjectMapperContext implements ContextInterface { public static ObjectMapper defaultMapper = new ObjectMapper(); + public static ObjectMapper defaultYamlMapper = new ObjectMapper(new YAMLFactory()); static { defaultMapper.setSerializationInclusion(JsonInclude.Include.NON_NULL); - int maxSize = - Integer.parseInt( - System.getenv() - .getOrDefault( - Constants.INGESTION_MAX_SERIALIZED_STRING_LENGTH, - Constants.MAX_JACKSON_STRING_SIZE)); - defaultMapper - .getFactory() - .setStreamReadConstraints(StreamReadConstraints.builder().maxStringLength(maxSize).build()); + + for (ObjectMapper mapper : List.of(defaultMapper, defaultYamlMapper)) { + int maxSize = + Integer.parseInt( + System.getenv() + .getOrDefault( + Constants.INGESTION_MAX_SERIALIZED_STRING_LENGTH, + Constants.MAX_JACKSON_STRING_SIZE)); + mapper + .getFactory() + .setStreamReadConstraints( + StreamReadConstraints.builder().maxStringLength(maxSize).build()); + } } public static ObjectMapperContext DEFAULT = ObjectMapperContext.builder().build(); @Nonnull private final ObjectMapper objectMapper; + @Nonnull private final ObjectMapper yamlMapper; @Override public Optional getCacheKeyComponent() { @@ -42,7 +50,10 @@ public ObjectMapperContext build() { if (this.objectMapper == null) { objectMapper(defaultMapper); } - return new ObjectMapperContext(this.objectMapper); + if (this.yamlMapper == null) { + yamlMapper(defaultYamlMapper); + } + return new ObjectMapperContext(this.objectMapper, this.yamlMapper); } } } diff --git a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/OperationContext.java b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/OperationContext.java index fc61ccf79544ff..61bf40f54817ee 100644 --- a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/OperationContext.java +++ b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/OperationContext.java @@ -402,6 +402,11 @@ public ObjectMapper getObjectMapper() { return objectMapperContext.getObjectMapper(); } + @Nonnull + public ObjectMapper getYamlMapper() { + return objectMapperContext.getYamlMapper(); + } + @Override public boolean equals(Object o) { if (this == o) return true; diff --git a/metadata-service/README.md b/metadata-service/README.md index 8aec1ecc3ab92a..0c7085b10da713 100644 --- a/metadata-service/README.md +++ b/metadata-service/README.md @@ -1291,6 +1291,7 @@ where valid conditions include - CONTAIN - END_WITH - EQUAL + - IEQUAL (support case insensitive values) - GREATER_THAN - GREATER_THAN_OR_EQUAL_TO - LESS_THAN diff --git a/metadata-service/configuration/src/main/resources/application.yaml b/metadata-service/configuration/src/main/resources/application.yaml index 5e07bfc479e93c..ef3ae76d81fae3 100644 --- a/metadata-service/configuration/src/main/resources/application.yaml +++ b/metadata-service/configuration/src/main/resources/application.yaml @@ -176,8 +176,8 @@ cassandra: elasticsearch: host: ${ELASTICSEARCH_HOST:localhost} port: ${ELASTICSEARCH_PORT:9200} - threadCount: ${ELASTICSEARCH_THREAD_COUNT:1} - connectionRequestTimeout: ${ELASTICSEARCH_CONNECTION_REQUEST_TIMEOUT:0} + threadCount: ${ELASTICSEARCH_THREAD_COUNT:2} + connectionRequestTimeout: ${ELASTICSEARCH_CONNECTION_REQUEST_TIMEOUT:5000} username: ${ELASTICSEARCH_USERNAME:#{null}} password: ${ELASTICSEARCH_PASSWORD:#{null}} pathPrefix: ${ELASTICSEARCH_PATH_PREFIX:#{null}} @@ -347,8 +347,6 @@ bootstrap: file: ${BOOTSTRAP_POLICIES_FILE:classpath:boot/policies.json} # eg for local file # file: "file:///datahub/datahub-gms/resources/custom-policies.json" - ownershipTypes: - file: ${BOOTSTRAP_OWNERSHIP_TYPES_FILE:classpath:boot/ownership_types.json} servlets: waitTimeout: ${BOOTSTRAP_SERVLETS_WAITTIMEOUT:60} # Total waiting time in seconds for servlets to initialize @@ -357,6 +355,8 @@ systemUpdate: maxBackOffs: ${BOOTSTRAP_SYSTEM_UPDATE_MAX_BACK_OFFS:50} backOffFactor: ${BOOTSTRAP_SYSTEM_UPDATE_BACK_OFF_FACTOR:2} # Multiplicative factor for back off, default values will result in waiting 5min 15s waitForSystemUpdate: ${BOOTSTRAP_SYSTEM_UPDATE_WAIT_FOR_SYSTEM_UPDATE:true} + bootstrap: + mcpConfig: ${SYSTEM_UPDATE_BOOTSTRAP_MCP_CONFIG:bootstrap_mcps.yaml} dataJobNodeCLL: enabled: ${BOOTSTRAP_SYSTEM_UPDATE_DATA_JOB_NODE_CLL_ENABLED:false} batchSize: ${BOOTSTRAP_SYSTEM_UPDATE_DATA_JOB_NODE_CLL_BATCH_SIZE:1000} @@ -436,6 +436,7 @@ featureFlags: businessAttributeEntityEnabled: ${BUSINESS_ATTRIBUTE_ENTITY_ENABLED:false} # Enables business attribute entity which can be associated with field of dataset dataContractsEnabled: ${DATA_CONTRACTS_ENABLED:true} # Enables the Data Contracts feature (Tab) in the UI showSeparateSiblings: ${SHOW_SEPARATE_SIBLINGS:false} # If turned on, all siblings will be separated with no way to get to a "combined" sibling view + editableDatasetNameEnabled: ${EDITABLE_DATASET_NAME_ENABLED:false} # Enables the ability to edit the dataset name in the UI entityChangeEvents: enabled: ${ENABLE_ENTITY_CHANGE_EVENTS_HOOK:true} @@ -541,6 +542,8 @@ metadataChangeProposal: sideEffects: schemaField: enabled: ${MCP_SIDE_EFFECTS_SCHEMA_FIELD_ENABLED:false} + dataProductUnset: + enabled: ${MCP_SIDE_EFFECTS_DATA_PRODUCT_UNSET_ENABLED:true} throttle: updateIntervalMs: ${MCP_THROTTLE_UPDATE_INTERVAL_MS:60000} diff --git a/metadata-service/configuration/src/main/resources/bootstrap_mcps.yaml b/metadata-service/configuration/src/main/resources/bootstrap_mcps.yaml new file mode 100644 index 00000000000000..b1612f95f92198 --- /dev/null +++ b/metadata-service/configuration/src/main/resources/bootstrap_mcps.yaml @@ -0,0 +1,36 @@ +bootstrap: + # Defaults + # force: false + # blocking: false + # async: true + # optional: false + templates: + # Bootstrap + - name: root-user + version: v1 + blocking: true + async: false + mcps_location: "bootstrap_mcps/root-user.yaml" + + - name: data-platforms + version: v1 + mcps_location: "bootstrap_mcps/data-platforms.yaml" + + - name: data-types + version: v1 + mcps_location: "bootstrap_mcps/data-types.yaml" + + - name: ownership-types + version: v1 + mcps_location: "bootstrap_mcps/ownership-types.yaml" + + - name: roles + version: v1 + mcps_location: "bootstrap_mcps/roles.yaml" + + # Ingestion Recipes + - name: ingestion-datahub-gc + version: v1 + optional: true + mcps_location: "bootstrap_mcps/ingestion-datahub-gc.yaml" + values_env: "DATAHUB_GC_BOOTSTRAP_VALUES" \ No newline at end of file diff --git a/metadata-service/configuration/src/main/resources/bootstrap_mcps/data-platforms.yaml b/metadata-service/configuration/src/main/resources/bootstrap_mcps/data-platforms.yaml new file mode 100644 index 00000000000000..24d5da22805cbe --- /dev/null +++ b/metadata-service/configuration/src/main/resources/bootstrap_mcps/data-platforms.yaml @@ -0,0 +1,709 @@ +# Instructions to add additional entry +# 1. Add new entry to this list +# 2. Increment version in bootstrap_mcps.yaml for the entry referring to this file +- entityUrn: urn:li:dataPlatform:adlsGen1 + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "/" + name: adlsGen1 + displayName: Azure Data Lake (Gen 1) + type: FILE_SYSTEM + logoUrl: "/assets/platforms/adlslogo.png" +- entityUrn: urn:li:dataPlatform:adlsGen2 + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "/" + name: adlsGen2 + displayName: Azure Data Lake (Gen 2) + type: FILE_SYSTEM + logoUrl: "/assets/platforms/adlslogo.png" +- entityUrn: urn:li:dataPlatform:airflow + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: airflow + displayName: Airflow + type: OTHERS + logoUrl: "/assets/platforms/airflowlogo.png" +- entityUrn: urn:li:dataPlatform:ambry + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: ambry + displayName: Ambry + type: OBJECT_STORE +- entityUrn: urn:li:dataPlatform:clickhouse + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: clickhouse + displayName: ClickHouse + type: RELATIONAL_DB + logoUrl: "/assets/platforms/clickhouselogo.png" +- entityUrn: urn:li:dataPlatform:cockroachdb + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: cockroachdb + displayName: CockroachDb + type: RELATIONAL_DB + logoUrl: "/assets/platforms/cockroachdblogo.png" +- entityUrn: urn:li:dataPlatform:couchbase + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: couchbase + displayName: Couchbase + type: KEY_VALUE_STORE + logoUrl: "/assets/platforms/couchbaselogo.png" +- entityUrn: urn:li:dataPlatform:dagster + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "/" + name: dagster + displayName: Dagster + type: OTHERS + logoUrl: "/assets/platforms/dagsterlogo.svg" +- entityUrn: urn:li:dataPlatform:external + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: external + displayName: External Source + type: OTHERS +- entityUrn: urn:li:dataPlatform:hdfs + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "/" + name: hdfs + displayName: HDFS + type: FILE_SYSTEM + logoUrl: "/assets/platforms/hadooplogo.png" +- entityUrn: urn:li:dataPlatform:hana + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: hana + displayName: SAP HANA + type: RELATIONAL_DB + logoUrl: "/assets/platforms/hanalogo.png" +- entityUrn: urn:li:dataPlatform:hive + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: hive + displayName: Hive + type: FILE_SYSTEM + logoUrl: "/assets/platforms/hivelogo.png" +- entityUrn: urn:li:dataPlatform:iceberg + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: iceberg + displayName: Iceberg + type: FILE_SYSTEM + logoUrl: "/assets/platforms/iceberglogo.png" +- entityUrn: urn:li:dataPlatform:s3 + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "/" + name: s3 + displayName: AWS S3 + type: FILE_SYSTEM + logoUrl: "/assets/platforms/s3.png" +- entityUrn: urn:li:dataPlatform:kafka + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: kafka + displayName: Kafka + type: MESSAGE_BROKER + logoUrl: "/assets/platforms/kafkalogo.png" +- entityUrn: urn:li:dataPlatform:kafka-connect + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: kafka-connect + displayName: Kafka Connect + type: OTHERS + logoUrl: "/assets/platforms/kafkalogo.png" +- entityUrn: urn:li:dataPlatform:kusto + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: kusto + displayName: Kusto + type: OLAP_DATASTORE + logoUrl: "/assets/platforms/kustologo.png" +- entityUrn: urn:li:dataPlatform:mode + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: mode + displayName: Mode + type: KEY_VALUE_STORE + logoUrl: "/assets/platforms/modelogo.png" +- entityUrn: urn:li:dataPlatform:mongodb + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: mongodb + displayName: MongoDB + type: KEY_VALUE_STORE + logoUrl: "/assets/platforms/mongodblogo.png" +- entityUrn: urn:li:dataPlatform:mysql + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: mysql + displayName: MySQL + type: RELATIONAL_DB + logoUrl: "/assets/platforms/mysqllogo.png" +- entityUrn: urn:li:dataPlatform:db2 + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: db2 + displayName: DB2 + type: RELATIONAL_DB + logoUrl: "/assets/platforms/db2logo.png" +- entityUrn: urn:li:dataPlatform:mariadb + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: mariadb + displayName: MariaDB + type: RELATIONAL_DB + logoUrl: "/assets/platforms/mariadblogo.png" +- entityUrn: urn:li:dataPlatform:OpenApi + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: openapi + displayName: OpenAPI + type: OTHERS + logoUrl: "/assets/platforms/openapilogo.png" +- entityUrn: urn:li:dataPlatform:oracle + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: oracle + displayName: Oracle + type: RELATIONAL_DB + logoUrl: "/assets/platforms/oraclelogo.png" +- entityUrn: urn:li:dataPlatform:pinot + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: pinot + displayName: Pinot + type: OLAP_DATASTORE + logoUrl: "/assets/platforms/pinotlogo.png" +- entityUrn: urn:li:dataPlatform:postgres + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: postgres + displayName: PostgreSQL + type: RELATIONAL_DB + logoUrl: "/assets/platforms/postgreslogo.png" +- entityUrn: urn:li:dataPlatform:prefect + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: prefect + displayName: Prefect + type: OTHERS + logoUrl: "/assets/platforms/prefectlogo.png" +- entityUrn: urn:li:dataPlatform:presto + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: prefect + displayName: Prefect + type: OTHERS + logoUrl: "/assets/platforms/prefectlogo.png" +- entityUrn: urn:li:dataPlatform:presto + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: presto + displayName: Presto + type: QUERY_ENGINE + logoUrl: "/assets/platforms/prestologo.png" +- entityUrn: urn:li:dataPlatform:tableau + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: tableau + displayName: Tableau + type: OTHERS + logoUrl: "/assets/platforms/tableaulogo.svg" +- entityUrn: urn:li:dataPlatform:teradata + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: teradata + displayName: Teradata + type: RELATIONAL_DB + logoUrl: "/assets/platforms/teradatalogo.png" +- entityUrn: urn:li:dataPlatform:voldemort + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: voldemort + displayName: Voldemort + type: KEY_VALUE_STORE +- entityUrn: urn:li:dataPlatform:snowflake + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: snowflake + displayName: Snowflake + type: RELATIONAL_DB + logoUrl: "/assets/platforms/snowflakelogo.png" +- entityUrn: urn:li:dataPlatform:redshift + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: redshift + displayName: Redshift + type: RELATIONAL_DB + logoUrl: "/assets/platforms/redshiftlogo.png" +- entityUrn: urn:li:dataPlatform:mssql + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: mssql + displayName: SQL Server + type: RELATIONAL_DB + logoUrl: "/assets/platforms/mssqllogo.png" +- entityUrn: urn:li:dataPlatform:bigquery + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: bigquery + displayName: BigQuery + type: RELATIONAL_DB + logoUrl: "/assets/platforms/bigquerylogo.png" +- entityUrn: urn:li:dataPlatform:druid + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: druid + displayName: Druid + type: OLAP_DATASTORE + logoUrl: "/assets/platforms/druidlogo.png" +- entityUrn: urn:li:dataPlatform:looker + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: looker + displayName: Looker + type: OTHERS + logoUrl: "/assets/platforms/lookerlogo.svg" +- entityUrn: urn:li:dataPlatform:feast + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: feast + displayName: Feast + type: OTHERS + logoUrl: "/assets/platforms/feastlogo.png" +- entityUrn: urn:li:dataPlatform:sagemaker + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: sagemaker + displayName: SageMaker + type: OTHERS + logoUrl: "/assets/platforms/sagemakerlogo.png" +- entityUrn: urn:li:dataPlatform:mlflow + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: mlflow + displayName: MLflow + type: OTHERS + logoUrl: "/assets/platforms/mlflowlogo.png" +- entityUrn: urn:li:dataPlatform:glue + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: glue + displayName: Glue + type: OTHERS + logoUrl: "/assets/platforms/gluelogo.png" +- entityUrn: urn:li:dataPlatform:redash + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: redash + displayName: Redash + type: OTHERS + logoUrl: "/assets/platforms/redashlogo.png" +- entityUrn: urn:li:dataPlatform:athena + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: athena + displayName: AWS Athena + type: RELATIONAL_DB + logoUrl: "/assets/platforms/awsathenalogo.png" +- entityUrn: urn:li:dataPlatform:spark + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: spark + displayName: Spark + type: OTHERS + logoUrl: "/assets/platforms/sparklogo.png" +- entityUrn: urn:li:dataPlatform:dbt + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: dbt + displayName: dbt + type: OTHERS + logoUrl: "/assets/platforms/dbtlogo.png" +- entityUrn: urn:li:dataPlatform:elasticsearch + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: elasticsearch + displayName: Elasticsearch + type: OTHERS + logoUrl: "/assets/platforms/elasticsearchlogo.png" +- entityUrn: urn:li:dataPlatform:great-expectations + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + name: Great Expectations + displayName: Great Expectations + type: OTHERS + logoUrl: "/assets/platforms/greatexpectationslogo.png" + datasetNameDelimiter: "." +- entityUrn: urn:li:dataPlatform:powerbi + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: powerbi + displayName: Power BI + type: OTHERS + logoUrl: "/assets/platforms/powerbilogo.png" +- entityUrn: urn:li:dataPlatform:presto-on-hive + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: presto-on-hive + displayName: Presto on Hive + type: FILE_SYSTEM + logoUrl: "/assets/platforms/prestoonhivelogo.png" +- entityUrn: urn:li:dataPlatform:metabase + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: metabase + displayName: Metabase + type: OTHERS + logoUrl: "/assets/platforms/metabaselogo.svg" +- entityUrn: urn:li:dataPlatform:nifi + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: nifi + displayName: NiFi + type: OTHERS + logoUrl: "/assets/platforms/nifilogo.svg" +- entityUrn: urn:li:dataPlatform:superset + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: superset + displayName: Superset + type: OTHERS + logoUrl: "/assets/platforms/supersetlogo.png" +- entityUrn: urn:li:dataPlatform:trino + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: trino + displayName: Trino + type: QUERY_ENGINE + logoUrl: "/assets/platforms/trinologo.png" +- entityUrn: urn:li:dataPlatform:pulsar + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: pulsar + displayName: Pulsar + type: MESSAGE_BROKER + logoUrl: "/assets/platforms/pulsarlogo.png" +- entityUrn: urn:li:dataPlatform:salesforce + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: salesforce + displayName: Salesforce + type: OTHERS + logoUrl: "/assets/platforms/logo-salesforce.svg" +- entityUrn: urn:li:dataPlatform:unknown + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: Unknown Platform + displayName: N/A + type: OTHERS +- entityUrn: urn:li:dataPlatform:delta-lake + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: delta-lake + displayName: Delta Lake + type: OTHERS + logoUrl: "/assets/platforms/deltalakelogo.png" +- entityUrn: urn:li:dataPlatform:databricks + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: databricks + displayName: Databricks + type: OTHERS + logoUrl: "/assets/platforms/databrickslogo.png" +- entityUrn: urn:li:dataPlatform:vertica + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: vertica + displayName: Vertica + type: OLAP_DATASTORE + logoUrl: "/assets/platforms/verticalogo.png" +- entityUrn: urn:li:dataPlatform:gcs + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "/" + name: gcs + displayName: Google Cloud Storage + type: FILE_SYSTEM + logoUrl: "/assets/platforms/gcslogo.svg" +- entityUrn: urn:li:dataPlatform:slack + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: Slack + displayName: Slack + type: OTHERS + logoUrl: "/assets/platforms/slacklogo.png" +- entityUrn: urn:li:dataPlatform:microsoft-teams + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: Microsoft Teams + displayName: Microsoft Teams + type: OTHERS + logoUrl: "/assets/platforms/teamslogo.png" +- entityUrn: urn:li:dataPlatform:dynamodb + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: dynamodb + displayName: DynamoDB + type: KEY_VALUE_STORE + logoUrl: "/assets/platforms/dynamodblogo.png" +- entityUrn: urn:li:dataPlatform:fivetran + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: fivetran + displayName: Fivetran + type: OTHERS + logoUrl: "/assets/platforms/fivetranlogo.png" +- entityUrn: urn:li:dataPlatform:csv + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: csv + displayName: CSV + type: OTHERS + logoUrl: "/assets/platforms/csv-logo.png" +- entityUrn: urn:li:dataPlatform:qlik-sense + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: qlik-sense + displayName: Qlik Sense + type: OTHERS + logoUrl: "/assets/platforms/qliklogo.png" +- entityUrn: urn:li:dataPlatform:file + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: file + displayName: File + type: OTHERS + logoUrl: "/assets/platforms/file-logo.svg" +- entityUrn: urn:li:dataPlatform:excel + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + name: excel + displayName: Excel + type: OTHERS + datasetNameDelimiter: "/" + logoUrl: "/assets/platforms/excel-logo.svg" +- entityUrn: urn:li:dataPlatform:sigma + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: sigma + displayName: Sigma + type: OTHERS + logoUrl: "/assets/platforms/sigmalogo.png" +- entityUrn: urn:li:dataPlatform:sac + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: sac + displayName: SAP Analytics Cloud + type: OTHERS + logoUrl: "/assets/platforms/saclogo.svg" diff --git a/metadata-service/configuration/src/main/resources/bootstrap_mcps/data-types.yaml b/metadata-service/configuration/src/main/resources/bootstrap_mcps/data-types.yaml new file mode 100644 index 00000000000000..e73288f8171438 --- /dev/null +++ b/metadata-service/configuration/src/main/resources/bootstrap_mcps/data-types.yaml @@ -0,0 +1,43 @@ +# Instructions to add additional entry +# 1. Add new entry to this list +# 2. Increment version in bootstrap_mcps.yaml for the entry referring to this file +- entityUrn: urn:li:dataType:datahub.string + entityType: dataType + aspectName: dataTypeInfo + changeType: UPSERT + aspect: + qualifiedName: datahub.string + displayName: String + description: A string of characters. +- entityUrn: urn:li:dataType:datahub.number + entityType: dataType + aspectName: dataTypeInfo + changeType: UPSERT + aspect: + qualifiedName: datahub.number + displayName: Number + description: An integer or decimal number. +- entityUrn: urn:li:dataType:datahub.urn + entityType: dataType + aspectName: dataTypeInfo + changeType: UPSERT + aspect: + qualifiedName: datahub.urn + displayName: Urn + description: An unique identifier for a DataHub entity. +- entityUrn: urn:li:dataType:datahub.rich_text + entityType: dataType + aspectName: dataTypeInfo + changeType: UPSERT + aspect: + qualifiedName: datahub.rich_text + displayName: Rich Text + description: An attributed string of characters. +- entityUrn: urn:li:dataType:datahub.date + entityType: dataType + aspectName: dataTypeInfo + changeType: UPSERT + aspect: + qualifiedName: datahub.date + displayName: Date + description: A specific day, without time. \ No newline at end of file diff --git a/metadata-service/configuration/src/main/resources/bootstrap_mcps/ingestion-datahub-gc.yaml b/metadata-service/configuration/src/main/resources/bootstrap_mcps/ingestion-datahub-gc.yaml new file mode 100644 index 00000000000000..aa3c768dcf1b89 --- /dev/null +++ b/metadata-service/configuration/src/main/resources/bootstrap_mcps/ingestion-datahub-gc.yaml @@ -0,0 +1,33 @@ +# Instructions to add additional entry or update on the target system +# 1. Edit this file +# 2. Increment version in bootstrap_mcps.yaml for the entry referring to this file +- entityType: dataHubIngestionSource + entityUrn: urn:li:dataHubIngestionSource:datahub-gc + aspectName: dataHubIngestionSourceInfo + changeType: UPSERT + aspect: + type: 'datahub-gc' + name: '{{ingestion.name}}{{^ingestion.name}}datahub-gc{{/ingestion.name}}' + schedule: + timezone: '{{schedule.timezone}}{{^schedule.timezone}}UTC{{/schedule.timezone}}' + interval: '{{schedule.interval}}{{^schedule.interval}}0 1 * * *{{/schedule.interval}}' + config: + version: 0.14.1.1rc5 + recipe: + source: + type: 'datahub-gc' + config: + cleanup_expired_tokens: {{cleanup_expired_tokens}}{{^cleanup_expired_tokens}}false{{/cleanup_expired_tokens}} + truncate_indices: {{truncate_indices}}{{^truncate_indices}}true{{/truncate_indices}} + dataprocess_cleanup: + retention_days: {{dataprocess_cleanup.retention_days}}{{^dataprocess_cleanup.retention_days}}10{{/dataprocess_cleanup.retention_days}} + delete_empty_data_jobs: {{dataprocess_cleanup.delete_empty_data_jobs}}{{^dataprocess_cleanup.delete_empty_data_jobs}}true{{/dataprocess_cleanup.delete_empty_data_jobs}} + delete_empty_data_flows: {{dataprocess_cleanup.delete_empty_data_flows}}{{^dataprocess_cleanup.delete_empty_data_flows}}true{{/dataprocess_cleanup.delete_empty_data_flows}} + hard_delete_entities: {{dataprocess_cleanup.hard_delete_entities}}{{^dataprocess_cleanup.hard_delete_entities}}false{{/dataprocess_cleanup.hard_delete_entities}} + keep_last_n: {{dataprocess_cleanup.keep_last_n}}{{^dataprocess_cleanup.keep_last_n}}5{{/dataprocess_cleanup.keep_last_n}} + soft_deleted_entities_cleanup: + retention_days: {{soft_deleted_entities_cleanup.retention_days}}{{^soft_deleted_entities_cleanup.retention_days}}10{{/soft_deleted_entities_cleanup.retention_days}} + extraArgs: {} + debugMode: false + executorId: default + headers: {} \ No newline at end of file diff --git a/metadata-service/configuration/src/main/resources/bootstrap_mcps/ownership-types.yaml b/metadata-service/configuration/src/main/resources/bootstrap_mcps/ownership-types.yaml new file mode 100644 index 00000000000000..23d1f37b76138c --- /dev/null +++ b/metadata-service/configuration/src/main/resources/bootstrap_mcps/ownership-types.yaml @@ -0,0 +1,39 @@ +# Instructions to add additional entry +# 1. Add new entry to this list +# 2. Increment version in bootstrap_mcps.yaml for the entry referring to this file +- entityUrn: urn:li:ownershipType:__system__technical_owner + entityType: ownershipType + aspectName: ownershipTypeInfo + changeType: UPSERT + aspect: + name: Technical Owner + description: Involved in the production, maintenance, or distribution of the asset(s). + created: {{&auditStamp}} + lastModified: {{&auditStamp}} +- entityUrn: urn:li:ownershipType:__system__business_owner + entityType: ownershipType + aspectName: ownershipTypeInfo + changeType: UPSERT + aspect: + name: Business Owner + description: Principle stakeholders or domain experts associated with the asset(s). + created: {{&auditStamp}} + lastModified: {{&auditStamp}} +- entityUrn: urn:li:ownershipType:__system__data_steward + entityType: ownershipType + aspectName: ownershipTypeInfo + changeType: UPSERT + aspect: + name: Data Steward + description: Involved in governance of the asset(s). + created: {{&auditStamp}} + lastModified: {{&auditStamp}} +- entityUrn: urn:li:ownershipType:__system__none + entityType: ownershipType + aspectName: ownershipTypeInfo + changeType: UPSERT + aspect: + name: None + description: No ownership type specified. + created: {{&auditStamp}} + lastModified: {{&auditStamp}} diff --git a/metadata-service/configuration/src/main/resources/bootstrap_mcps/roles.yaml b/metadata-service/configuration/src/main/resources/bootstrap_mcps/roles.yaml new file mode 100644 index 00000000000000..274b4fce1c3bff --- /dev/null +++ b/metadata-service/configuration/src/main/resources/bootstrap_mcps/roles.yaml @@ -0,0 +1,28 @@ +# Instructions to add additional entry +# 1. Add new entry to this list +# 2. Increment version in bootstrap_mcps.yaml for the entry referring to this file +- entityUrn: urn:li:dataHubRole:Admin + entityType: dataHubRole + aspectName: dataHubRoleInfo + changeType: UPSERT + aspect: + name: Admin + description: Can do everything on the platform. + editable: false +- entityUrn: urn:li:dataHubRole:Editor + entityType: dataHubRole + aspectName: dataHubRoleInfo + changeType: UPSERT + aspect: + name: Editor + description: Can read and edit all metadata. Cannot take administrative actions. + editable: false +- entityUrn: urn:li:dataHubRole:Reader + entityType: dataHubRole + aspectName: dataHubRoleInfo + changeType: UPSERT + aspect: + name: Reader + description: Can read all metadata. Cannot edit anything by default, or take administrative + actions. + editable: false \ No newline at end of file diff --git a/metadata-service/configuration/src/main/resources/bootstrap_mcps/root-user.yaml b/metadata-service/configuration/src/main/resources/bootstrap_mcps/root-user.yaml new file mode 100644 index 00000000000000..40d33468f0168a --- /dev/null +++ b/metadata-service/configuration/src/main/resources/bootstrap_mcps/root-user.yaml @@ -0,0 +1,8 @@ +- entityUrn: urn:li:corpuser:datahub + entityType: corpuser + aspectName: corpUserInfo + changeType: UPSERT + aspect: + active: true + displayName: DataHub + title: DataHub Root User diff --git a/metadata-service/factories/build.gradle b/metadata-service/factories/build.gradle index 76e4de70071f56..5e52efd245b7fa 100644 --- a/metadata-service/factories/build.gradle +++ b/metadata-service/factories/build.gradle @@ -48,7 +48,7 @@ dependencies { implementation spec.product.pegasus.restliSpringBridge implementation spec.product.pegasus.restliDocgen implementation externalDependency.jline - implementation externalDependency.common + implementation externalDependency.commonsIo testImplementation externalDependency.springBootTest testImplementation externalDependency.mockito diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java index 67fe2dd6d605de..4a2095685abe1f 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java @@ -7,6 +7,7 @@ import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; import com.linkedin.metadata.aspect.plugins.hooks.MCPSideEffect; import com.linkedin.metadata.aspect.plugins.hooks.MutationHook; +import com.linkedin.metadata.dataproducts.sideeffects.DataProductUnsetSideEffect; import com.linkedin.metadata.schemafields.sideeffects.SchemaFieldSideEffect; import com.linkedin.metadata.timeline.eventgenerator.EntityChangeEventGeneratorRegistry; import com.linkedin.metadata.timeline.eventgenerator.SchemaMetadataChangeEventGenerator; @@ -80,4 +81,27 @@ public MCPSideEffect schemaFieldSideEffect() { .setConfig(config) .setEntityChangeEventGeneratorRegistry(entityChangeEventGeneratorRegistry); } + + @Bean + @ConditionalOnProperty( + name = "metadataChangeProposal.sideEffects.dataProductUnset.enabled", + havingValue = "true") + public MCPSideEffect dataProductUnsetSideEffect() { + AspectPluginConfig config = + AspectPluginConfig.builder() + .enabled(true) + .className(DataProductUnsetSideEffect.class.getName()) + .supportedOperations( + List.of("CREATE", "CREATE_ENTITY", "UPSERT", "RESTATE", "DELETE", "PATCH")) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .entityName(Constants.DATA_PRODUCT_ENTITY_NAME) + .aspectName(Constants.DATA_PRODUCT_PROPERTIES_ASPECT_NAME) + .build())) + .build(); + + log.info("Initialized {}", SchemaFieldSideEffect.class.getName()); + return new DataProductUnsetSideEffect().setConfig(config); + } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/BootstrapManagerFactory.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/BootstrapManagerFactory.java index 9e29883f439a74..ffc739e905cd68 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/BootstrapManagerFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/BootstrapManagerFactory.java @@ -11,15 +11,10 @@ import com.linkedin.metadata.boot.dependencies.BootstrapDependency; import com.linkedin.metadata.boot.steps.IndexDataPlatformsStep; import com.linkedin.metadata.boot.steps.IngestDataPlatformInstancesStep; -import com.linkedin.metadata.boot.steps.IngestDataPlatformsStep; -import com.linkedin.metadata.boot.steps.IngestDataTypesStep; import com.linkedin.metadata.boot.steps.IngestDefaultGlobalSettingsStep; import com.linkedin.metadata.boot.steps.IngestEntityTypesStep; -import com.linkedin.metadata.boot.steps.IngestOwnershipTypesStep; import com.linkedin.metadata.boot.steps.IngestPoliciesStep; import com.linkedin.metadata.boot.steps.IngestRetentionPoliciesStep; -import com.linkedin.metadata.boot.steps.IngestRolesStep; -import com.linkedin.metadata.boot.steps.IngestRootUserStep; import com.linkedin.metadata.boot.steps.RemoveClientIdAspectStep; import com.linkedin.metadata.boot.steps.RestoreColumnLineageIndices; import com.linkedin.metadata.boot.steps.RestoreDbtSiblingsIndices; @@ -90,21 +85,14 @@ public class BootstrapManagerFactory { @Value("${bootstrap.policies.file}") private Resource _policiesResource; - @Value("${bootstrap.ownershipTypes.file}") - private Resource _ownershipTypesResource; - @Bean(name = "bootstrapManager") @Scope("singleton") @Nonnull protected BootstrapManager createInstance( @Qualifier("systemOperationContext") final OperationContext systemOpContext) { - final IngestRootUserStep ingestRootUserStep = new IngestRootUserStep(_entityService); final IngestPoliciesStep ingestPoliciesStep = new IngestPoliciesStep( _entityService, _entitySearchService, _searchDocumentTransformer, _policiesResource); - final IngestRolesStep ingestRolesStep = new IngestRolesStep(_entityService, _entityRegistry); - final IngestDataPlatformsStep ingestDataPlatformsStep = - new IngestDataPlatformsStep(_entityService); final IngestDataPlatformInstancesStep ingestDataPlatformInstancesStep = new IngestDataPlatformInstancesStep(_entityService, _migrationsDao); final RestoreGlossaryIndices restoreGlossaryIndicesStep = @@ -121,29 +109,21 @@ protected BootstrapManager createInstance( new IngestDefaultGlobalSettingsStep(_entityService); final WaitForSystemUpdateStep waitForSystemUpdateStep = new WaitForSystemUpdateStep(_dataHubUpgradeKafkaListener, _configurationProvider); - final IngestOwnershipTypesStep ingestOwnershipTypesStep = - new IngestOwnershipTypesStep(_entityService, _ownershipTypesResource); - final IngestDataTypesStep ingestDataTypesStep = new IngestDataTypesStep(_entityService); final IngestEntityTypesStep ingestEntityTypesStep = new IngestEntityTypesStep(_entityService); final List finalSteps = new ArrayList<>( ImmutableList.of( waitForSystemUpdateStep, - ingestRootUserStep, ingestPoliciesStep, - ingestRolesStep, - ingestDataPlatformsStep, ingestDataPlatformInstancesStep, _ingestRetentionPoliciesStep, - ingestOwnershipTypesStep, ingestSettingsStep, restoreGlossaryIndicesStep, removeClientIdAspectStep, restoreDbtSiblingsIndices, indexDataPlatformsStep, restoreColumnLineageIndices, - ingestDataTypesStep, ingestEntityTypesStep)); return new BootstrapManager(finalSteps); diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformsStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformsStep.java deleted file mode 100644 index f88343b6db322b..00000000000000 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformsStep.java +++ /dev/null @@ -1,117 +0,0 @@ -package com.linkedin.metadata.boot.steps; - -import static com.linkedin.metadata.Constants.*; - -import com.datahub.util.RecordUtils; -import com.fasterxml.jackson.core.StreamReadConstraints; -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.linkedin.common.AuditStamp; -import com.linkedin.common.urn.Urn; -import com.linkedin.dataplatform.DataPlatformInfo; -import com.linkedin.metadata.Constants; -import com.linkedin.metadata.boot.BootstrapStep; -import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; -import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; -import io.datahubproject.metadata.context.OperationContext; -import java.io.IOException; -import java.net.URISyntaxException; -import java.util.List; -import java.util.Spliterator; -import java.util.Spliterators; -import java.util.stream.Collectors; -import java.util.stream.StreamSupport; -import javax.annotation.Nonnull; -import lombok.RequiredArgsConstructor; -import lombok.extern.slf4j.Slf4j; -import org.springframework.core.io.ClassPathResource; - -@Slf4j -@RequiredArgsConstructor -public class IngestDataPlatformsStep implements BootstrapStep { - - private static final String PLATFORM_ASPECT_NAME = "dataPlatformInfo"; - - private final EntityService _entityService; - - @Override - public String name() { - return "IngestDataPlatformsStep"; - } - - @Override - public void execute(@Nonnull OperationContext systemOperationContext) - throws IOException, URISyntaxException { - - final ObjectMapper mapper = new ObjectMapper(); - int maxSize = - Integer.parseInt( - System.getenv() - .getOrDefault(INGESTION_MAX_SERIALIZED_STRING_LENGTH, MAX_JACKSON_STRING_SIZE)); - mapper - .getFactory() - .setStreamReadConstraints(StreamReadConstraints.builder().maxStringLength(maxSize).build()); - - // 1. Read from the file into JSON. - final JsonNode dataPlatforms = - mapper.readTree(new ClassPathResource("./boot/data_platforms.json").getFile()); - - if (!dataPlatforms.isArray()) { - throw new RuntimeException( - String.format( - "Found malformed data platforms file, expected an Array but found %s", - dataPlatforms.getNodeType())); - } - - // 2. For each JSON object, cast into a DataPlatformSnapshot object. - List dataPlatformAspects = - StreamSupport.stream( - Spliterators.spliteratorUnknownSize(dataPlatforms.iterator(), Spliterator.ORDERED), - false) - .map( - dataPlatform -> { - final String urnString; - final Urn urn; - try { - urnString = dataPlatform.get("urn").asText(); - urn = Urn.createFromString(urnString); - } catch (URISyntaxException e) { - log.error("Malformed urn: {}", dataPlatform.get("urn").asText()); - throw new RuntimeException("Malformed urn", e); - } - - final DataPlatformInfo info = - RecordUtils.toRecordTemplate( - DataPlatformInfo.class, dataPlatform.get("aspect").toString()); - - try { - return ChangeItemImpl.builder() - .urn(urn) - .aspectName(PLATFORM_ASPECT_NAME) - .recordTemplate(info) - .auditStamp( - new AuditStamp() - .setActor(Urn.createFromString(Constants.SYSTEM_ACTOR)) - .setTime(System.currentTimeMillis())) - .build( - systemOperationContext - .getRetrieverContext() - .get() - .getAspectRetriever()); - } catch (URISyntaxException e) { - throw new RuntimeException(e); - } - }) - .collect(Collectors.toList()); - - _entityService.ingestAspects( - systemOperationContext, - AspectsBatchImpl.builder() - .retrieverContext(systemOperationContext.getRetrieverContext().get()) - .items(dataPlatformAspects) - .build(), - true, - false); - } -} diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataTypesStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataTypesStep.java deleted file mode 100644 index 1ac3aeb2daed0e..00000000000000 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataTypesStep.java +++ /dev/null @@ -1,108 +0,0 @@ -package com.linkedin.metadata.boot.steps; - -import static com.linkedin.metadata.Constants.*; - -import com.datahub.util.RecordUtils; -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.linkedin.common.AuditStamp; -import com.linkedin.common.urn.Urn; -import com.linkedin.datatype.DataTypeInfo; -import com.linkedin.events.metadata.ChangeType; -import com.linkedin.metadata.boot.BootstrapStep; -import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.utils.GenericRecordUtils; -import com.linkedin.mxe.MetadataChangeProposal; -import io.datahubproject.metadata.context.OperationContext; -import java.util.HashMap; -import java.util.Map; -import java.util.Objects; -import java.util.Set; -import javax.annotation.Nonnull; -import lombok.extern.slf4j.Slf4j; -import org.springframework.core.io.ClassPathResource; - -/** This bootstrap step is responsible for ingesting default data types. */ -@Slf4j -public class IngestDataTypesStep implements BootstrapStep { - - private static final String DEFAULT_FILE_PATH = "./boot/data_types.json"; - private static final ObjectMapper JSON_MAPPER = new ObjectMapper(); - private final EntityService _entityService; - private final String _resourcePath; - - public IngestDataTypesStep(@Nonnull final EntityService entityService) { - this(entityService, DEFAULT_FILE_PATH); - } - - public IngestDataTypesStep( - @Nonnull final EntityService entityService, @Nonnull final String filePath) { - _entityService = Objects.requireNonNull(entityService, "entityService must not be null"); - _resourcePath = filePath; - } - - @Override - public String name() { - return "IngestDataTypesStep"; - } - - @Override - public void execute(@Nonnull OperationContext systemOperationContext) throws Exception { - log.info("Ingesting default data types..."); - - // 1. Read from the file into JSON. - final JsonNode dataTypesObj = - JSON_MAPPER.readTree(new ClassPathResource(_resourcePath).getFile()); - - if (!dataTypesObj.isArray()) { - throw new RuntimeException( - String.format( - "Found malformed data types file, expected an Array but found %s", - dataTypesObj.getNodeType())); - } - - log.info("Ingesting {} data types types", dataTypesObj.size()); - int numIngested = 0; - - Map urnDataTypesMap = new HashMap<>(); - for (final JsonNode roleObj : dataTypesObj) { - final Urn urn = Urn.createFromString(roleObj.get("urn").asText()); - urnDataTypesMap.put(urn, roleObj); - } - - Set existingUrns = _entityService.exists(systemOperationContext, urnDataTypesMap.keySet()); - - for (final Map.Entry entry : urnDataTypesMap.entrySet()) { - if (!existingUrns.contains(entry.getKey())) { - final DataTypeInfo info = - RecordUtils.toRecordTemplate( - DataTypeInfo.class, entry.getValue().get("info").toString()); - log.info(String.format("Ingesting default data type with urn %s", entry.getKey())); - ingestDataType(systemOperationContext, entry.getKey(), info); - numIngested++; - } - } - log.info("Ingested {} new data types", numIngested); - } - - private void ingestDataType( - @Nonnull OperationContext systemOperationContext, - final Urn dataTypeUrn, - final DataTypeInfo info) - throws Exception { - final MetadataChangeProposal proposal = new MetadataChangeProposal(); - proposal.setEntityUrn(dataTypeUrn); - proposal.setEntityType(DATA_TYPE_ENTITY_NAME); - proposal.setAspectName(DATA_TYPE_INFO_ASPECT_NAME); - proposal.setAspect(GenericRecordUtils.serializeAspect(info)); - proposal.setChangeType(ChangeType.UPSERT); - - _entityService.ingestProposal( - systemOperationContext, - proposal, - new AuditStamp() - .setActor(Urn.createFromString(SYSTEM_ACTOR)) - .setTime(System.currentTimeMillis()), - false); - } -} diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestOwnershipTypesStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestOwnershipTypesStep.java deleted file mode 100644 index 4488849f34ca91..00000000000000 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestOwnershipTypesStep.java +++ /dev/null @@ -1,117 +0,0 @@ -package com.linkedin.metadata.boot.steps; - -import static com.linkedin.metadata.Constants.*; - -import com.datahub.util.RecordUtils; -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.linkedin.common.AuditStamp; -import com.linkedin.common.urn.Urn; -import com.linkedin.events.metadata.ChangeType; -import com.linkedin.metadata.Constants; -import com.linkedin.metadata.boot.BootstrapStep; -import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; -import com.linkedin.metadata.models.AspectSpec; -import com.linkedin.metadata.utils.EntityKeyUtils; -import com.linkedin.metadata.utils.GenericRecordUtils; -import com.linkedin.mxe.GenericAspect; -import com.linkedin.mxe.MetadataChangeProposal; -import com.linkedin.ownership.OwnershipTypeInfo; -import io.datahubproject.metadata.context.OperationContext; -import java.util.List; -import javax.annotation.Nonnull; -import lombok.RequiredArgsConstructor; -import lombok.extern.slf4j.Slf4j; -import org.springframework.core.io.Resource; - -/** - * This bootstrap step is responsible for ingesting default ownership types. - * - *

If system has never bootstrapped this step will: For each ownership type defined in the yaml - * file, it checks whether the urn exists. If not, it ingests the ownership type into DataHub. - */ -@Slf4j -@RequiredArgsConstructor -public class IngestOwnershipTypesStep implements BootstrapStep { - - private static final ObjectMapper JSON_MAPPER = new ObjectMapper(); - private final EntityService _entityService; - private final Resource _ownershipTypesResource; - - @Override - public String name() { - return "IngestOwnershipTypesStep"; - } - - @Override - public void execute(@Nonnull OperationContext systemOperationContext) throws Exception { - log.info("Ingesting default ownership types from {}...", _ownershipTypesResource); - - // 1. Read from the file into JSON. - final JsonNode ownershipTypesObj = JSON_MAPPER.readTree(_ownershipTypesResource.getFile()); - - if (!ownershipTypesObj.isArray()) { - throw new RuntimeException( - String.format( - "Found malformed ownership file, expected an Array but found %s", - ownershipTypesObj.getNodeType())); - } - - final AuditStamp auditStamp = - new AuditStamp() - .setActor(Urn.createFromString(Constants.SYSTEM_ACTOR)) - .setTime(System.currentTimeMillis()); - - log.info("Ingesting {} ownership types", ownershipTypesObj.size()); - int numIngested = 0; - for (final JsonNode roleObj : ownershipTypesObj) { - final Urn urn = Urn.createFromString(roleObj.get("urn").asText()); - final OwnershipTypeInfo info = - RecordUtils.toRecordTemplate(OwnershipTypeInfo.class, roleObj.get("info").toString()); - log.info(String.format("Ingesting default ownership type with urn %s", urn)); - ingestOwnershipType(systemOperationContext, urn, info, auditStamp); - numIngested++; - } - log.info("Ingested {} new ownership types", numIngested); - } - - private void ingestOwnershipType( - @Nonnull OperationContext systemOperationContext, - final Urn ownershipTypeUrn, - final OwnershipTypeInfo info, - final AuditStamp auditStamp) { - - // 3. Write key & aspect MCPs. - final MetadataChangeProposal keyAspectProposal = new MetadataChangeProposal(); - final AspectSpec keyAspectSpec = - systemOperationContext.getEntityRegistryContext().getKeyAspectSpec(ownershipTypeUrn); - GenericAspect aspect = - GenericRecordUtils.serializeAspect( - EntityKeyUtils.convertUrnToEntityKey(ownershipTypeUrn, keyAspectSpec)); - keyAspectProposal.setAspect(aspect); - keyAspectProposal.setAspectName(keyAspectSpec.getName()); - keyAspectProposal.setEntityType(OWNERSHIP_TYPE_ENTITY_NAME); - keyAspectProposal.setChangeType(ChangeType.UPSERT); - keyAspectProposal.setEntityUrn(ownershipTypeUrn); - - final MetadataChangeProposal proposal = new MetadataChangeProposal(); - proposal.setEntityUrn(ownershipTypeUrn); - proposal.setEntityType(OWNERSHIP_TYPE_ENTITY_NAME); - proposal.setAspectName(OWNERSHIP_TYPE_INFO_ASPECT_NAME); - info.setCreated(auditStamp); - info.setLastModified(auditStamp); - proposal.setAspect(GenericRecordUtils.serializeAspect(info)); - proposal.setChangeType(ChangeType.UPSERT); - - _entityService.ingestProposal( - systemOperationContext, - AspectsBatchImpl.builder() - .mcps( - List.of(keyAspectProposal, proposal), - auditStamp, - systemOperationContext.getRetrieverContext().get()) - .build(), - false); - } -} diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestRolesStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestRolesStep.java deleted file mode 100644 index 449336268e34f2..00000000000000 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestRolesStep.java +++ /dev/null @@ -1,154 +0,0 @@ -package com.linkedin.metadata.boot.steps; - -import static com.linkedin.metadata.Constants.*; - -import com.datahub.util.RecordUtils; -import com.fasterxml.jackson.core.StreamReadConstraints; -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.linkedin.common.AuditStamp; -import com.linkedin.common.urn.Urn; -import com.linkedin.events.metadata.ChangeType; -import com.linkedin.metadata.Constants; -import com.linkedin.metadata.boot.BootstrapStep; -import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; -import com.linkedin.metadata.models.AspectSpec; -import com.linkedin.metadata.models.registry.EntityRegistry; -import com.linkedin.metadata.utils.EntityKeyUtils; -import com.linkedin.metadata.utils.GenericRecordUtils; -import com.linkedin.mxe.GenericAspect; -import com.linkedin.mxe.MetadataChangeProposal; -import com.linkedin.policy.DataHubRoleInfo; -import io.datahubproject.metadata.context.OperationContext; -import jakarta.annotation.Nonnull; -import java.net.URISyntaxException; -import java.util.List; -import lombok.RequiredArgsConstructor; -import lombok.extern.slf4j.Slf4j; -import org.springframework.core.io.ClassPathResource; - -@Slf4j -@RequiredArgsConstructor -public class IngestRolesStep implements BootstrapStep { - private static final int SLEEP_SECONDS = 60; - private final EntityService _entityService; - private final EntityRegistry _entityRegistry; - - @Override - public String name() { - return this.getClass().getSimpleName(); - } - - @Nonnull - @Override - public ExecutionMode getExecutionMode() { - return ExecutionMode.ASYNC; - } - - @Override - public void execute(@Nonnull OperationContext systemOperationContext) throws Exception { - final ObjectMapper mapper = new ObjectMapper(); - int maxSize = - Integer.parseInt( - System.getenv() - .getOrDefault(INGESTION_MAX_SERIALIZED_STRING_LENGTH, MAX_JACKSON_STRING_SIZE)); - mapper - .getFactory() - .setStreamReadConstraints(StreamReadConstraints.builder().maxStringLength(maxSize).build()); - - // Sleep to ensure deployment process finishes. - Thread.sleep(SLEEP_SECONDS * 1000); - - // 0. Execute preflight check to see whether we need to ingest Roles - log.info("Ingesting default Roles..."); - - // 1. Read from the file into JSON. - final JsonNode rolesObj = mapper.readTree(new ClassPathResource("./boot/roles.json").getFile()); - - if (!rolesObj.isArray()) { - throw new RuntimeException( - String.format( - "Found malformed roles file, expected an Array but found %s", - rolesObj.getNodeType())); - } - - final AspectSpec roleInfoAspectSpec = - _entityRegistry - .getEntitySpec(DATAHUB_ROLE_ENTITY_NAME) - .getAspectSpec(DATAHUB_ROLE_INFO_ASPECT_NAME); - final AuditStamp auditStamp = - new AuditStamp() - .setActor(Urn.createFromString(Constants.SYSTEM_ACTOR)) - .setTime(System.currentTimeMillis()); - - for (final JsonNode roleObj : rolesObj) { - final Urn urn = Urn.createFromString(roleObj.get("urn").asText()); - - // If the info is not there, it means that the role was there before, but must now be removed - if (!roleObj.has("info")) { - _entityService.deleteUrn(systemOperationContext, urn); - continue; - } - - final DataHubRoleInfo info = - RecordUtils.toRecordTemplate(DataHubRoleInfo.class, roleObj.get("info").toString()); - ingestRole(systemOperationContext, urn, info, auditStamp, roleInfoAspectSpec); - } - - log.info("Successfully ingested default Roles."); - } - - private void ingestRole( - @Nonnull OperationContext systemOperationContext, - final Urn roleUrn, - final DataHubRoleInfo dataHubRoleInfo, - final AuditStamp auditStamp, - final AspectSpec roleInfoAspectSpec) - throws URISyntaxException { - // 3. Write key & aspect - final MetadataChangeProposal keyAspectProposal = new MetadataChangeProposal(); - final AspectSpec keyAspectSpec = - systemOperationContext.getEntityRegistryContext().getKeyAspectSpec(roleUrn); - GenericAspect aspect = - GenericRecordUtils.serializeAspect( - EntityKeyUtils.convertUrnToEntityKey(roleUrn, keyAspectSpec)); - keyAspectProposal.setAspect(aspect); - keyAspectProposal.setAspectName(keyAspectSpec.getName()); - keyAspectProposal.setEntityType(DATAHUB_ROLE_ENTITY_NAME); - keyAspectProposal.setChangeType(ChangeType.UPSERT); - keyAspectProposal.setEntityUrn(roleUrn); - - final MetadataChangeProposal proposal = new MetadataChangeProposal(); - proposal.setEntityUrn(roleUrn); - proposal.setEntityType(DATAHUB_ROLE_ENTITY_NAME); - proposal.setAspectName(DATAHUB_ROLE_INFO_ASPECT_NAME); - proposal.setAspect(GenericRecordUtils.serializeAspect(dataHubRoleInfo)); - proposal.setChangeType(ChangeType.UPSERT); - - _entityService.ingestProposal( - systemOperationContext, - AspectsBatchImpl.builder() - .mcps( - List.of(keyAspectProposal, proposal), - new AuditStamp() - .setActor(Urn.createFromString(SYSTEM_ACTOR)) - .setTime(System.currentTimeMillis()), - systemOperationContext.getRetrieverContext().get()) - .build(), - false); - - _entityService.alwaysProduceMCLAsync( - systemOperationContext, - roleUrn, - DATAHUB_ROLE_ENTITY_NAME, - DATAHUB_ROLE_INFO_ASPECT_NAME, - roleInfoAspectSpec, - null, - dataHubRoleInfo, - null, - null, - auditStamp, - ChangeType.RESTATE); - } -} diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestRootUserStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestRootUserStep.java deleted file mode 100644 index f4862275d310be..00000000000000 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestRootUserStep.java +++ /dev/null @@ -1,96 +0,0 @@ -package com.linkedin.metadata.boot.steps; - -import static com.linkedin.metadata.Constants.*; - -import com.datahub.util.RecordUtils; -import com.fasterxml.jackson.core.StreamReadConstraints; -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.linkedin.common.AuditStamp; -import com.linkedin.common.urn.Urn; -import com.linkedin.identity.CorpUserInfo; -import com.linkedin.metadata.boot.BootstrapStep; -import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.key.CorpUserKey; -import com.linkedin.metadata.models.AspectSpec; -import com.linkedin.metadata.models.EntitySpec; -import com.linkedin.metadata.utils.EntityKeyUtils; -import com.linkedin.util.Pair; -import io.datahubproject.metadata.context.OperationContext; -import java.io.IOException; -import java.net.URISyntaxException; -import java.util.List; -import javax.annotation.Nonnull; -import lombok.RequiredArgsConstructor; -import lombok.extern.slf4j.Slf4j; -import org.springframework.core.io.ClassPathResource; - -@Slf4j -@RequiredArgsConstructor -public class IngestRootUserStep implements BootstrapStep { - - private static final String USER_INFO_ASPECT_NAME = "corpUserInfo"; - - private final EntityService _entityService; - - @Override - public String name() { - return getClass().getSimpleName(); - } - - @Override - public void execute(@Nonnull OperationContext systemOperationContext) - throws IOException, URISyntaxException { - - final ObjectMapper mapper = new ObjectMapper(); - int maxSize = - Integer.parseInt( - System.getenv() - .getOrDefault(INGESTION_MAX_SERIALIZED_STRING_LENGTH, MAX_JACKSON_STRING_SIZE)); - mapper - .getFactory() - .setStreamReadConstraints(StreamReadConstraints.builder().maxStringLength(maxSize).build()); - - // 1. Read from the file into JSON. - final JsonNode userObj = - mapper.readTree(new ClassPathResource("./boot/root_user.json").getFile()); - - if (!userObj.isObject()) { - throw new RuntimeException( - String.format( - "Found malformed root user file, expected an Object but found %s", - userObj.getNodeType())); - } - - // 2. Ingest the user info - final Urn urn; - try { - urn = Urn.createFromString(userObj.get("urn").asText()); - } catch (URISyntaxException e) { - log.error("Malformed urn: {}", userObj.get("urn").asText()); - throw new RuntimeException("Malformed urn", e); - } - - final CorpUserInfo info = - RecordUtils.toRecordTemplate(CorpUserInfo.class, userObj.get("info").toString()); - final CorpUserKey key = - (CorpUserKey) - EntityKeyUtils.convertUrnToEntityKey(urn, getUserKeyAspectSpec(systemOperationContext)); - final AuditStamp aspectAuditStamp = - new AuditStamp() - .setActor(Urn.createFromString(SYSTEM_ACTOR)) - .setTime(System.currentTimeMillis()); - - _entityService.ingestAspects( - systemOperationContext, - urn, - List.of(Pair.of(CORP_USER_KEY_ASPECT_NAME, key), Pair.of(USER_INFO_ASPECT_NAME, info)), - aspectAuditStamp, - null); - } - - private AspectSpec getUserKeyAspectSpec(@Nonnull OperationContext opContext) { - final EntitySpec spec = opContext.getEntityRegistry().getEntitySpec(CORP_USER_ENTITY_NAME); - return spec.getKeyAspectSpec(); - } -} diff --git a/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/IngestDataTypesStepTest.java b/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/IngestDataTypesStepTest.java deleted file mode 100644 index 65cffc6b86a5bb..00000000000000 --- a/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/IngestDataTypesStepTest.java +++ /dev/null @@ -1,97 +0,0 @@ -package com.linkedin.metadata.boot.steps; - -import static com.linkedin.metadata.Constants.*; -import static org.mockito.Mockito.*; - -import com.linkedin.common.AuditStamp; -import com.linkedin.common.urn.Urn; -import com.linkedin.common.urn.UrnUtils; -import com.linkedin.datatype.DataTypeInfo; -import com.linkedin.events.metadata.ChangeType; -import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.models.registry.ConfigEntityRegistry; -import com.linkedin.metadata.models.registry.EntityRegistry; -import com.linkedin.metadata.utils.GenericRecordUtils; -import com.linkedin.mxe.MetadataChangeProposal; -import io.datahubproject.metadata.context.EntityRegistryContext; -import io.datahubproject.metadata.context.OperationContext; -import java.util.Collection; -import java.util.Set; -import org.jetbrains.annotations.NotNull; -import org.mockito.Mockito; -import org.testng.Assert; -import org.testng.annotations.Test; - -public class IngestDataTypesStepTest { - - private static final Urn TEST_DATA_TYPE_URN = UrnUtils.getUrn("urn:li:dataType:datahub.test"); - - @Test - public void testExecuteValidDataTypesNoExistingDataTypes() throws Exception { - EntityRegistry testEntityRegistry = getTestEntityRegistry(); - final EntityService entityService = mock(EntityService.class); - - final OperationContext mockContext = mock(OperationContext.class); - final EntityRegistryContext entityRegistryContext = mock(EntityRegistryContext.class); - when(mockContext.getEntityRegistryContext()).thenReturn(entityRegistryContext); - when(mockContext.getEntityRegistry()).thenReturn(testEntityRegistry); - when(entityRegistryContext.getKeyAspectSpec(anyString())) - .thenAnswer( - args -> testEntityRegistry.getEntitySpec(args.getArgument(0)).getKeyAspectSpec()); - - final IngestDataTypesStep step = - new IngestDataTypesStep(entityService, "./boot/test_data_types_valid.json"); - - step.execute(mockContext); - - DataTypeInfo expectedResult = new DataTypeInfo(); - expectedResult.setDescription("Test Description"); - expectedResult.setDisplayName("Test Name"); - expectedResult.setQualifiedName("datahub.test"); - - Mockito.verify(entityService, times(1)) - .ingestProposal( - any(OperationContext.class), - Mockito.eq(buildUpdateDataTypeProposal(expectedResult)), - Mockito.any(AuditStamp.class), - Mockito.eq(false)); - } - - @Test - public void testExecuteInvalidJson() throws Exception { - final EntityService entityService = mock(EntityService.class); - final OperationContext mockContext = mock(OperationContext.class); - when(mockContext.getEntityRegistry()).thenReturn(mock(EntityRegistry.class)); - - when(entityService.exists(any(OperationContext.class), any(Collection.class))) - .thenAnswer(args -> Set.of()); - - final IngestDataTypesStep step = - new IngestDataTypesStep(entityService, "./boot/test_data_types_invalid.json"); - - Assert.assertThrows(RuntimeException.class, () -> step.execute(mockContext)); - - verify(entityService, times(1)).exists(any(OperationContext.class), any(Collection.class)); - - // Verify no additional interactions - verifyNoMoreInteractions(entityService); - } - - private static MetadataChangeProposal buildUpdateDataTypeProposal(final DataTypeInfo info) { - final MetadataChangeProposal mcp = new MetadataChangeProposal(); - mcp.setEntityUrn(TEST_DATA_TYPE_URN); - mcp.setEntityType(DATA_TYPE_ENTITY_NAME); - mcp.setAspectName(DATA_TYPE_INFO_ASPECT_NAME); - mcp.setChangeType(ChangeType.UPSERT); - mcp.setAspect(GenericRecordUtils.serializeAspect(info)); - return mcp; - } - - @NotNull - private ConfigEntityRegistry getTestEntityRegistry() { - return new ConfigEntityRegistry( - IngestDataPlatformInstancesStepTest.class - .getClassLoader() - .getResourceAsStream("test-entity-registry.yaml")); - } -} diff --git a/metadata-service/factories/src/test/resources/boot/test_data_types_invalid.json b/metadata-service/factories/src/test/resources/boot/test_data_types_invalid.json deleted file mode 100644 index ed1d8a7b45abe0..00000000000000 --- a/metadata-service/factories/src/test/resources/boot/test_data_types_invalid.json +++ /dev/null @@ -1,9 +0,0 @@ -[ - { - "urn": "urn:li:dataType:datahub.test", - "badField": { - "qualifiedName":"datahub.test", - "description": "Test Description" - } - } -] \ No newline at end of file diff --git a/metadata-service/factories/src/test/resources/boot/test_data_types_valid.json b/metadata-service/factories/src/test/resources/boot/test_data_types_valid.json deleted file mode 100644 index 3694c92947aa18..00000000000000 --- a/metadata-service/factories/src/test/resources/boot/test_data_types_valid.json +++ /dev/null @@ -1,10 +0,0 @@ -[ - { - "urn": "urn:li:dataType:datahub.test", - "info": { - "qualifiedName":"datahub.test", - "displayName": "Test Name", - "description": "Test Description" - } - } -] \ No newline at end of file diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.analytics.analytics.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.analytics.analytics.snapshot.json index e8cc193f3458d0..061feafac1b9b9 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.analytics.analytics.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.analytics.analytics.snapshot.json @@ -56,13 +56,14 @@ "type" : "enum", "name" : "Condition", "doc" : "The matching condition in a filter criterion", - "symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ], + "symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IEQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ], "symbolDocs" : { "ANCESTORS_INCL" : "Represent the relation: URN field matches any nested parent in addition to the given URN", "CONTAIN" : "Represent the relation: String field contains value, e.g. name contains Profile", "DESCENDANTS_INCL" : "Represent the relation: URN field any nested children in addition to the given URN", "END_WITH" : "Represent the relation: String field ends with value, e.g. name ends with Event", "EQUAL" : "Represent the relation: field = value, e.g. platform = hdfs", + "IEQUAL" : "Represent the relation: field = value and support case insensitive values, e.g. platform = hdfs", "EXISTS" : "Represents the relation: field exists and is non-empty, e.g. owners is not null and != [] (empty)", "GREATER_THAN" : "Represent the relation greater than, e.g. ownerCount > 5", "GREATER_THAN_OR_EQUAL_TO" : "Represent the relation greater than or equal to, e.g. ownerCount >= 5", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json index bc4d222e316b0e..30d0e9a09cdf41 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json @@ -162,13 +162,14 @@ "type" : "enum", "name" : "Condition", "doc" : "The matching condition in a filter criterion", - "symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ], + "symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IEQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ], "symbolDocs" : { "ANCESTORS_INCL" : "Represent the relation: URN field matches any nested parent in addition to the given URN", "CONTAIN" : "Represent the relation: String field contains value, e.g. name contains Profile", "DESCENDANTS_INCL" : "Represent the relation: URN field any nested children in addition to the given URN", "END_WITH" : "Represent the relation: String field ends with value, e.g. name ends with Event", "EQUAL" : "Represent the relation: field = value, e.g. platform = hdfs", + "IEQUAL" : "Represent the relation: field = value and support case insensitive values, e.g. platform = hdfs", "EXISTS" : "Represents the relation: field exists and is non-empty, e.g. owners is not null and != [] (empty)", "GREATER_THAN" : "Represent the relation greater than, e.g. ownerCount > 5", "GREATER_THAN_OR_EQUAL_TO" : "Represent the relation greater than or equal to, e.g. ownerCount >= 5", @@ -842,7 +843,7 @@ "name" : "FabricType", "namespace" : "com.linkedin.common", "doc" : "Fabric group type", - "symbols" : [ "DEV", "TEST", "QA", "UAT", "EI", "PRE", "STG", "NON_PROD", "PROD", "CORP", "RVW" ], + "symbols" : [ "DEV", "TEST", "QA", "UAT", "EI", "PRE", "STG", "NON_PROD", "PROD", "CORP", "RVW", "SANDBOX" ], "symbolDocs" : { "CORP" : "Designates corporation fabrics", "DEV" : "Designates development fabrics", @@ -852,6 +853,7 @@ "PROD" : "Designates production fabrics", "QA" : "Designates quality assurance fabrics", "RVW" : "Designates review fabrics", + "SANDBOX" : "Designates sandbox fabrics", "STG" : "Designates staging fabrics", "TEST" : "Designates testing fabrics", "UAT" : "Designates user acceptance testing fabrics" diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json index 8ff0aa930770cd..8cf02a768ecae2 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json @@ -834,7 +834,7 @@ "name" : "FabricType", "namespace" : "com.linkedin.common", "doc" : "Fabric group type", - "symbols" : [ "DEV", "TEST", "QA", "UAT", "EI", "PRE", "STG", "NON_PROD", "PROD", "CORP", "RVW" ], + "symbols" : [ "DEV", "TEST", "QA", "UAT", "EI", "PRE", "STG", "NON_PROD", "PROD", "CORP", "RVW", "SANDBOX" ], "symbolDocs" : { "CORP" : "Designates corporation fabrics", "DEV" : "Designates development fabrics", @@ -844,6 +844,7 @@ "PROD" : "Designates production fabrics", "QA" : "Designates quality assurance fabrics", "RVW" : "Designates review fabrics", + "SANDBOX" : "Designates sandbox fabrics", "STG" : "Designates staging fabrics", "TEST" : "Designates testing fabrics", "UAT" : "Designates user acceptance testing fabrics" @@ -4542,11 +4543,7 @@ "name" : "description", "type" : "string", "doc" : "Documentation of the MLPrimaryKey", - "optional" : true, - "Searchable" : { - "fieldType" : "TEXT", - "hasValuesFieldName" : "hasDescription" - } + "optional" : true }, { "name" : "dataType", "type" : "com.linkedin.common.MLFeatureDataType", @@ -6057,13 +6054,14 @@ "name" : "Condition", "namespace" : "com.linkedin.metadata.query.filter", "doc" : "The matching condition in a filter criterion", - "symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ], + "symbols" : [ "CONTAIN", "END_WITH", "EQUAL","IEQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ], "symbolDocs" : { "ANCESTORS_INCL" : "Represent the relation: URN field matches any nested parent in addition to the given URN", "CONTAIN" : "Represent the relation: String field contains value, e.g. name contains Profile", "DESCENDANTS_INCL" : "Represent the relation: URN field any nested children in addition to the given URN", "END_WITH" : "Represent the relation: String field ends with value, e.g. name ends with Event", "EQUAL" : "Represent the relation: field = value, e.g. platform = hdfs", + "IEQUAL" : "Represent the relation: field = value and support case insensitive values, e.g. platform = hdfs", "EXISTS" : "Represents the relation: field exists and is non-empty, e.g. owners is not null and != [] (empty)", "GREATER_THAN" : "Represent the relation greater than, e.g. ownerCount > 5", "GREATER_THAN_OR_EQUAL_TO" : "Represent the relation greater than or equal to, e.g. ownerCount >= 5", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json index 16549757a961fa..d06f3b737a3e17 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json @@ -576,7 +576,7 @@ "name" : "FabricType", "namespace" : "com.linkedin.common", "doc" : "Fabric group type", - "symbols" : [ "DEV", "TEST", "QA", "UAT", "EI", "PRE", "STG", "NON_PROD", "PROD", "CORP", "RVW" ], + "symbols" : [ "DEV", "TEST", "QA", "UAT", "EI", "PRE", "STG", "NON_PROD", "PROD", "CORP", "RVW", "SANDBOX" ], "symbolDocs" : { "CORP" : "Designates corporation fabrics", "DEV" : "Designates development fabrics", @@ -586,6 +586,7 @@ "PROD" : "Designates production fabrics", "QA" : "Designates quality assurance fabrics", "RVW" : "Designates review fabrics", + "SANDBOX" : "Designates sandbox fabrics", "STG" : "Designates staging fabrics", "TEST" : "Designates testing fabrics", "UAT" : "Designates user acceptance testing fabrics" diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json index 95df1d2ce21d93..56562ff49ff8d9 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json @@ -576,7 +576,7 @@ "name" : "FabricType", "namespace" : "com.linkedin.common", "doc" : "Fabric group type", - "symbols" : [ "DEV", "TEST", "QA", "UAT", "EI", "PRE", "STG", "NON_PROD", "PROD", "CORP", "RVW" ], + "symbols" : [ "DEV", "TEST", "QA", "UAT", "EI", "PRE", "STG", "NON_PROD", "PROD", "CORP", "RVW", "SANDBOX" ], "symbolDocs" : { "CORP" : "Designates corporation fabrics", "DEV" : "Designates development fabrics", @@ -586,6 +586,7 @@ "PROD" : "Designates production fabrics", "QA" : "Designates quality assurance fabrics", "RVW" : "Designates review fabrics", + "SANDBOX" : "Designates sandbox fabrics", "STG" : "Designates staging fabrics", "TEST" : "Designates testing fabrics", "UAT" : "Designates user acceptance testing fabrics" diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json index 226279e1762297..b90543745c65f4 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json @@ -834,7 +834,7 @@ "name" : "FabricType", "namespace" : "com.linkedin.common", "doc" : "Fabric group type", - "symbols" : [ "DEV", "TEST", "QA", "UAT", "EI", "PRE", "STG", "NON_PROD", "PROD", "CORP", "RVW" ], + "symbols" : [ "DEV", "TEST", "QA", "UAT", "EI", "PRE", "STG", "NON_PROD", "PROD", "CORP", "RVW", "SANDBOX" ], "symbolDocs" : { "CORP" : "Designates corporation fabrics", "DEV" : "Designates development fabrics", @@ -844,6 +844,7 @@ "PROD" : "Designates production fabrics", "QA" : "Designates quality assurance fabrics", "RVW" : "Designates review fabrics", + "SANDBOX" : "Designates sandbox fabrics", "STG" : "Designates staging fabrics", "TEST" : "Designates testing fabrics", "UAT" : "Designates user acceptance testing fabrics" @@ -4536,11 +4537,7 @@ "name" : "description", "type" : "string", "doc" : "Documentation of the MLPrimaryKey", - "optional" : true, - "Searchable" : { - "fieldType" : "TEXT", - "hasValuesFieldName" : "hasDescription" - } + "optional" : true }, { "name" : "dataType", "type" : "com.linkedin.common.MLFeatureDataType", diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/service/DataProductService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/service/DataProductService.java index 3abd663832f4b1..f222c31d0876d8 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/service/DataProductService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/service/DataProductService.java @@ -340,12 +340,6 @@ public void batchSetDataProduct( .filter(urn -> !existingResourceUrns.contains(urn)) .collect(Collectors.toList()); - // unset existing data product on resources first as we only allow one data product on an - // entity at a time - for (Urn resourceUrn : resourceUrns) { - unsetDataProduct(opContext, resourceUrn, actorUrn); - } - AuditStamp nowAuditStamp = new AuditStamp().setTime(System.currentTimeMillis()).setActor(actorUrn); for (Urn resourceUrn : newResourceUrns) { @@ -390,7 +384,7 @@ public void unsetDataProduct( 10, // should never be more than 1 as long as we only allow one actorUrn.toString()); - if (relationships.hasRelationships() && relationships.getRelationships().size() > 0) { + if (relationships.hasRelationships() && !relationships.getRelationships().isEmpty()) { relationships .getRelationships() .forEach( diff --git a/metadata-service/war/src/main/resources/boot/data_platforms.json b/metadata-service/war/src/main/resources/boot/data_platforms.json deleted file mode 100644 index 03f1cf8e6c934e..00000000000000 --- a/metadata-service/war/src/main/resources/boot/data_platforms.json +++ /dev/null @@ -1,708 +0,0 @@ -[ - { - "urn": "urn:li:dataPlatform:adlsGen1", - "aspect": { - "datasetNameDelimiter": "/", - "name": "adlsGen1", - "displayName": "Azure Data Lake (Gen 1)", - "type": "FILE_SYSTEM", - "logoUrl": "/assets/platforms/adlslogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:adlsGen2", - "aspect": { - "datasetNameDelimiter": "/", - "name": "adlsGen2", - "displayName": "Azure Data Lake (Gen 2)", - "type": "FILE_SYSTEM", - "logoUrl": "/assets/platforms/adlslogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:airflow", - "aspect": { - "datasetNameDelimiter": ".", - "name": "airflow", - "displayName": "Airflow", - "type": "OTHERS", - "logoUrl": "/assets/platforms/airflowlogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:ambry", - "aspect": { - "datasetNameDelimiter": ".", - "name": "ambry", - "displayName": "Ambry", - "type": "OBJECT_STORE" - } - }, - { - "urn": "urn:li:dataPlatform:clickhouse", - "aspect": { - "datasetNameDelimiter": ".", - "name": "clickhouse", - "displayName": "ClickHouse", - "type": "RELATIONAL_DB", - "logoUrl": "/assets/platforms/clickhouselogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:cockroachdb", - "aspect": { - "datasetNameDelimiter": ".", - "name": "cockroachdb", - "displayName": "CockroachDb", - "type": "RELATIONAL_DB", - "logoUrl": "/assets/platforms/cockroachdblogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:couchbase", - "aspect": { - "datasetNameDelimiter": ".", - "name": "couchbase", - "displayName": "Couchbase", - "type": "KEY_VALUE_STORE", - "logoUrl": "/assets/platforms/couchbaselogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:dagster", - "aspect": { - "datasetNameDelimiter": "/", - "name": "dagster", - "displayName": "Dagster", - "type": "OTHERS", - "logoUrl": "/assets/platforms/dagsterlogo.svg" - } - }, - { - "urn": "urn:li:dataPlatform:external", - "aspect": { - "datasetNameDelimiter": ".", - "name": "external", - "displayName": "External Source", - "type": "OTHERS" - } - }, - { - "urn": "urn:li:dataPlatform:hdfs", - "aspect": { - "datasetNameDelimiter": "/", - "name": "hdfs", - "displayName": "HDFS", - "type": "FILE_SYSTEM", - "logoUrl": "/assets/platforms/hadooplogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:hana", - "aspect": { - "datasetNameDelimiter": ".", - "name": "hana", - "displayName": "SAP HANA", - "type": "RELATIONAL_DB", - "logoUrl": "/assets/platforms/hanalogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:hive", - "aspect": { - "datasetNameDelimiter": ".", - "name": "hive", - "displayName": "Hive", - "type": "FILE_SYSTEM", - "logoUrl": "/assets/platforms/hivelogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:iceberg", - "aspect": { - "datasetNameDelimiter": ".", - "name": "iceberg", - "displayName": "Iceberg", - "type": "FILE_SYSTEM", - "logoUrl": "/assets/platforms/iceberglogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:s3", - "aspect": { - "datasetNameDelimiter": "/", - "name": "s3", - "displayName": "AWS S3", - "type": "FILE_SYSTEM", - "logoUrl": "/assets/platforms/s3.png" - } - }, - { - "urn": "urn:li:dataPlatform:kafka", - "aspect": { - "datasetNameDelimiter": ".", - "name": "kafka", - "displayName": "Kafka", - "type": "MESSAGE_BROKER", - "logoUrl": "/assets/platforms/kafkalogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:kafka-connect", - "aspect": { - "datasetNameDelimiter": ".", - "name": "kafka-connect", - "displayName": "Kafka Connect", - "type": "OTHERS", - "logoUrl": "/assets/platforms/kafkalogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:kusto", - "aspect": { - "datasetNameDelimiter": ".", - "name": "kusto", - "displayName": "Kusto", - "type": "OLAP_DATASTORE", - "logoUrl": "/assets/platforms/kustologo.png" - } - }, - { - "urn": "urn:li:dataPlatform:mode", - "aspect": { - "datasetNameDelimiter": ".", - "name": "mode", - "displayName": "Mode", - "type": "KEY_VALUE_STORE", - "logoUrl": "/assets/platforms/modelogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:mongodb", - "aspect": { - "datasetNameDelimiter": ".", - "name": "mongodb", - "displayName": "MongoDB", - "type": "KEY_VALUE_STORE", - "logoUrl": "/assets/platforms/mongodblogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:mysql", - "aspect": { - "datasetNameDelimiter": ".", - "name": "mysql", - "displayName": "MySQL", - "type": "RELATIONAL_DB", - "logoUrl": "/assets/platforms/mysqllogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:db2", - "aspect": { - "datasetNameDelimiter": ".", - "name": "db2", - "displayName": "DB2", - "type": "RELATIONAL_DB", - "logoUrl": "/assets/platforms/db2logo.png" - } - }, - { - "urn": "urn:li:dataPlatform:mariadb", - "aspect": { - "datasetNameDelimiter": ".", - "name": "mariadb", - "displayName": "MariaDB", - "type": "RELATIONAL_DB", - "logoUrl": "/assets/platforms/mariadblogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:OpenApi", - "aspect": { - "datasetNameDelimiter": ".", - "name": "openapi", - "displayName": "OpenAPI", - "type": "OTHERS", - "logoUrl": "/assets/platforms/openapilogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:oracle", - "aspect": { - "datasetNameDelimiter": ".", - "name": "oracle", - "displayName": "Oracle", - "type": "RELATIONAL_DB", - "logoUrl": "/assets/platforms/oraclelogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:pinot", - "aspect": { - "datasetNameDelimiter": ".", - "name": "pinot", - "displayName": "Pinot", - "type": "OLAP_DATASTORE", - "logoUrl": "/assets/platforms/pinotlogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:postgres", - "aspect": { - "datasetNameDelimiter": ".", - "name": "postgres", - "displayName": "PostgreSQL", - "type": "RELATIONAL_DB", - "logoUrl": "/assets/platforms/postgreslogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:prefect", - "aspect": { - "datasetNameDelimiter": ".", - "name": "prefect", - "displayName": "Prefect", - "type": "OTHERS", - "logoUrl": "/assets/platforms/prefectlogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:presto", - "aspect": { - "datasetNameDelimiter": ".", - "name": "prefect", - "displayName": "Prefect", - "type": "OTHERS", - "logoUrl": "/assets/platforms/prefectlogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:presto", - "aspect": { - "datasetNameDelimiter": ".", - "name": "presto", - "displayName": "Presto", - "type": "QUERY_ENGINE", - "logoUrl": "/assets/platforms/prestologo.png" - } - }, - { - "urn": "urn:li:dataPlatform:tableau", - "aspect": { - "datasetNameDelimiter": ".", - "name": "tableau", - "displayName": "Tableau", - "type": "OTHERS", - "logoUrl": "/assets/platforms/tableaulogo.svg" - } - }, - { - "urn": "urn:li:dataPlatform:teradata", - "aspect": { - "datasetNameDelimiter": ".", - "name": "teradata", - "displayName": "Teradata", - "type": "RELATIONAL_DB", - "logoUrl": "/assets/platforms/teradatalogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:voldemort", - "aspect": { - "datasetNameDelimiter": ".", - "name": "voldemort", - "displayName": "Voldemort", - "type": "KEY_VALUE_STORE" - } - }, - { - "urn": "urn:li:dataPlatform:snowflake", - "aspect": { - "datasetNameDelimiter": ".", - "name": "snowflake", - "displayName": "Snowflake", - "type": "RELATIONAL_DB", - "logoUrl": "/assets/platforms/snowflakelogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:redshift", - "aspect": { - "datasetNameDelimiter": ".", - "name": "redshift", - "displayName": "Redshift", - "type": "RELATIONAL_DB", - "logoUrl": "/assets/platforms/redshiftlogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:mssql", - "aspect": { - "datasetNameDelimiter": ".", - "name": "mssql", - "displayName": "SQL Server", - "type": "RELATIONAL_DB", - "logoUrl": "/assets/platforms/mssqllogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:bigquery", - "aspect": { - "datasetNameDelimiter": ".", - "name": "bigquery", - "displayName": "BigQuery", - "type": "RELATIONAL_DB", - "logoUrl": "/assets/platforms/bigquerylogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:druid", - "aspect": { - "datasetNameDelimiter": ".", - "name": "druid", - "displayName": "Druid", - "type": "OLAP_DATASTORE", - "logoUrl": "/assets/platforms/druidlogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:looker", - "aspect": { - "datasetNameDelimiter": ".", - "name": "looker", - "displayName": "Looker", - "type": "OTHERS", - "logoUrl": "/assets/platforms/lookerlogo.svg" - } - }, - { - "urn": "urn:li:dataPlatform:feast", - "aspect": { - "datasetNameDelimiter": ".", - "name": "feast", - "displayName": "Feast", - "type": "OTHERS", - "logoUrl": "/assets/platforms/feastlogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:sagemaker", - "aspect": { - "datasetNameDelimiter": ".", - "name": "sagemaker", - "displayName": "SageMaker", - "type": "OTHERS", - "logoUrl": "/assets/platforms/sagemakerlogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:mlflow", - "aspect": { - "datasetNameDelimiter": ".", - "name": "mlflow", - "displayName": "MLflow", - "type": "OTHERS", - "logoUrl": "/assets/platforms/mlflowlogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:glue", - "aspect": { - "datasetNameDelimiter": ".", - "name": "glue", - "displayName": "Glue", - "type": "OTHERS", - "logoUrl": "/assets/platforms/gluelogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:redash", - "aspect": { - "datasetNameDelimiter": ".", - "name": "redash", - "displayName": "Redash", - "type": "OTHERS", - "logoUrl": "/assets/platforms/redashlogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:athena", - "aspect": { - "datasetNameDelimiter": ".", - "name": "athena", - "displayName": "AWS Athena", - "type": "RELATIONAL_DB", - "logoUrl": "/assets/platforms/awsathenalogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:spark", - "aspect": { - "datasetNameDelimiter": ".", - "name": "spark", - "displayName": "Spark", - "type": "OTHERS", - "logoUrl": "/assets/platforms/sparklogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:dbt", - "aspect": { - "datasetNameDelimiter": ".", - "name": "dbt", - "displayName": "dbt", - "type": "OTHERS", - "logoUrl": "/assets/platforms/dbtlogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:elasticsearch", - "aspect": { - "datasetNameDelimiter": ".", - "name": "elasticsearch", - "displayName": "Elasticsearch", - "type": "OTHERS", - "logoUrl": "/assets/platforms/elasticsearchlogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:great-expectations", - "aspect": { - "name": "Great Expectations", - "displayName": "Great Expectations", - "type": "OTHERS", - "logoUrl": "/assets/platforms/greatexpectationslogo.png", - "datasetNameDelimiter": "." - } - }, - { - "urn": "urn:li:dataPlatform:powerbi", - "aspect": { - "datasetNameDelimiter": ".", - "name": "powerbi", - "displayName": "Power BI", - "type": "OTHERS", - "logoUrl": "/assets/platforms/powerbilogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:presto-on-hive", - "aspect": { - "datasetNameDelimiter": ".", - "name": "presto-on-hive", - "displayName": "Presto on Hive", - "type": "FILE_SYSTEM", - "logoUrl": "/assets/platforms/prestoonhivelogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:metabase", - "aspect": { - "datasetNameDelimiter": ".", - "name": "metabase", - "displayName": "Metabase", - "type": "OTHERS", - "logoUrl": "/assets/platforms/metabaselogo.svg" - } - }, - { - "urn": "urn:li:dataPlatform:nifi", - "aspect": { - "datasetNameDelimiter": ".", - "name": "nifi", - "displayName": "NiFi", - "type": "OTHERS", - "logoUrl": "/assets/platforms/nifilogo.svg" - } - }, - { - "urn": "urn:li:dataPlatform:superset", - "aspect": { - "datasetNameDelimiter": ".", - "name": "superset", - "displayName": "Superset", - "type": "OTHERS", - "logoUrl": "/assets/platforms/supersetlogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:trino", - "aspect": { - "datasetNameDelimiter": ".", - "name": "trino", - "displayName": "Trino", - "type": "QUERY_ENGINE", - "logoUrl": "/assets/platforms/trinologo.png" - } - }, - { - "urn": "urn:li:dataPlatform:pulsar", - "aspect": { - "datasetNameDelimiter": ".", - "name": "pulsar", - "displayName": "Pulsar", - "type": "MESSAGE_BROKER", - "logoUrl": "/assets/platforms/pulsarlogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:salesforce", - "aspect": { - "datasetNameDelimiter": ".", - "name": "salesforce", - "displayName": "Salesforce", - "type": "OTHERS", - "logoUrl": "/assets/platforms/logo-salesforce.svg" - } - }, - { - "urn": "urn:li:dataPlatform:unknown", - "aspect": { - "datasetNameDelimiter": ".", - "name": "Unknown Platform", - "displayName": "N/A", - "type": "OTHERS" - } - }, - { - "urn": "urn:li:dataPlatform:delta-lake", - "aspect": { - "datasetNameDelimiter": ".", - "name": "delta-lake", - "displayName": "Delta Lake", - "type": "OTHERS", - "logoUrl": "/assets/platforms/deltalakelogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:databricks", - "aspect": { - "datasetNameDelimiter": ".", - "name": "databricks", - "displayName": "Databricks", - "type": "OTHERS", - "logoUrl": "/assets/platforms/databrickslogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:vertica", - "aspect": { - "datasetNameDelimiter": ".", - "name": "vertica", - "displayName": "Vertica", - "type": "OLAP_DATASTORE", - "logoUrl": "/assets/platforms/verticalogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:gcs", - "aspect": { - "datasetNameDelimiter": "/", - "name": "gcs", - "displayName": "Google Cloud Storage", - "type": "FILE_SYSTEM", - "logoUrl": "/assets/platforms/gcslogo.svg" - } - }, - { - "urn": "urn:li:dataPlatform:slack", - "aspect": { - "datasetNameDelimiter": ".", - "name": "Slack", - "displayName": "Slack", - "type": "OTHERS", - "logoUrl": "/assets/platforms/slacklogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:microsoft-teams", - "aspect": { - "datasetNameDelimiter": ".", - "name": "Microsoft Teams", - "displayName": "Microsoft Teams", - "type": "OTHERS", - "logoUrl": "/assets/platforms/teamslogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:dynamodb", - "aspect": { - "datasetNameDelimiter": ".", - "name": "dynamodb", - "displayName": "DynamoDB", - "type": "KEY_VALUE_STORE", - "logoUrl": "/assets/platforms/dynamodblogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:fivetran", - "aspect": { - "datasetNameDelimiter": ".", - "name": "fivetran", - "displayName": "Fivetran", - "type": "OTHERS", - "logoUrl": "/assets/platforms/fivetranlogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:csv", - "aspect": { - "datasetNameDelimiter": ".", - "name": "csv", - "displayName": "CSV", - "type": "OTHERS", - "logoUrl": "/assets/platforms/csv-logo.png" - } - }, - { - "urn": "urn:li:dataPlatform:qlik-sense", - "aspect": { - "datasetNameDelimiter": ".", - "name": "qlik-sense", - "displayName": "Qlik Sense", - "type": "OTHERS", - "logoUrl": "/assets/platforms/qliklogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:file", - "aspect": { - "datasetNameDelimiter": ".", - "name": "file", - "displayName": "File", - "type": "OTHERS", - "logoUrl": "/assets/platforms/file-logo.svg" - } - }, - { - "urn": "urn:li:dataPlatform:excel", - "aspect": { - "name": "excel", - "displayName": "Excel", - "type": "OTHERS", - "datasetNameDelimiter": "/", - "logoUrl": "/assets/platforms/excel-logo.svg" - } - }, - { - "urn": "urn:li:dataPlatform:sigma", - "aspect": { - "datasetNameDelimiter": ".", - "name": "sigma", - "displayName": "Sigma", - "type": "OTHERS", - "logoUrl": "/assets/platforms/sigmalogo.png" - } - }, - { - "urn": "urn:li:dataPlatform:sac", - "aspect": { - "datasetNameDelimiter": ".", - "name": "sac", - "displayName": "SAP Analytics Cloud", - "type": "OTHERS", - "logoUrl": "/assets/platforms/saclogo.svg" - } - } -] diff --git a/metadata-service/war/src/main/resources/boot/data_types.json b/metadata-service/war/src/main/resources/boot/data_types.json deleted file mode 100644 index 2d7294e45bd7a5..00000000000000 --- a/metadata-service/war/src/main/resources/boot/data_types.json +++ /dev/null @@ -1,42 +0,0 @@ -[ - { - "urn": "urn:li:dataType:datahub.string", - "info": { - "qualifiedName":"datahub.string", - "displayName": "String", - "description": "A string of characters." - } - }, - { - "urn": "urn:li:dataType:datahub.number", - "info": { - "qualifiedName":"datahub.number", - "displayName": "Number", - "description": "An integer or decimal number." - } - }, - { - "urn": "urn:li:dataType:datahub.urn", - "info": { - "qualifiedName":"datahub.urn", - "displayName": "Urn", - "description": "An unique identifier for a DataHub entity." - } - }, - { - "urn": "urn:li:dataType:datahub.rich_text", - "info": { - "qualifiedName":"datahub.rich_text", - "displayName": "Rich Text", - "description": "An attributed string of characters." - } - }, - { - "urn": "urn:li:dataType:datahub.date", - "info": { - "qualifiedName":"datahub.date", - "displayName": "Date", - "description": "A specific day, without time." - } - } -] diff --git a/metadata-service/war/src/main/resources/boot/ownership_types.json b/metadata-service/war/src/main/resources/boot/ownership_types.json deleted file mode 100644 index 79fe5d600a9ce0..00000000000000 --- a/metadata-service/war/src/main/resources/boot/ownership_types.json +++ /dev/null @@ -1,30 +0,0 @@ -[ - { - "urn": "urn:li:ownershipType:__system__technical_owner", - "info": { - "name":"Technical Owner", - "description":"Involved in the production, maintenance, or distribution of the asset(s)." - } - }, - { - "urn": "urn:li:ownershipType:__system__business_owner", - "info": { - "name":"Business Owner", - "description":"Principle stakeholders or domain experts associated with the asset(s)." - } - }, - { - "urn": "urn:li:ownershipType:__system__data_steward", - "info": { - "name":"Data Steward", - "description":"Involved in governance of the asset(s)." - } - }, - { - "urn": "urn:li:ownershipType:__system__none", - "info": { - "name":"None", - "description":"No ownership type specified." - } - } -] \ No newline at end of file diff --git a/metadata-service/war/src/main/resources/boot/roles.json b/metadata-service/war/src/main/resources/boot/roles.json deleted file mode 100644 index 629226ef136e26..00000000000000 --- a/metadata-service/war/src/main/resources/boot/roles.json +++ /dev/null @@ -1,26 +0,0 @@ -[ - { - "urn": "urn:li:dataHubRole:Admin", - "info": { - "name":"Admin", - "description":"Can do everything on the platform.", - "editable":false - } - }, - { - "urn": "urn:li:dataHubRole:Editor", - "info": { - "name":"Editor", - "description":"Can read and edit all metadata. Cannot take administrative actions.", - "editable":false - } - }, - { - "urn": "urn:li:dataHubRole:Reader", - "info": { - "name":"Reader", - "description":"Can read all metadata. Cannot edit anything by default, or take administrative actions.", - "editable":false - } - } -] diff --git a/metadata-service/war/src/main/resources/boot/root_user.json b/metadata-service/war/src/main/resources/boot/root_user.json deleted file mode 100644 index 7922a6c7fa5afb..00000000000000 --- a/metadata-service/war/src/main/resources/boot/root_user.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "urn": "urn:li:corpuser:datahub", - "info": { - "active": true, - "displayName": "DataHub", - "title": "DataHub Root User" - } -} \ No newline at end of file diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java b/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java index 7a5a34d0f36301..5964bab9465284 100644 --- a/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java +++ b/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java @@ -59,6 +59,18 @@ public class PoliciesConfig { "Manage Users & Groups", "Create, remove, and update users and groups on DataHub."); + static final Privilege CREATE_USERS_AND_GROUPS_PRIVILEGE = + Privilege.of( + "CREATE_USERS_AND_GROUPS", + "Create Users & Groups", + "Create users and groups on DataHub."); + + static final Privilege UPDATE_USERS_AND_GROUPS_PRIVILEGE = + Privilege.of( + "UPDATE_USERS_AND_GROUPS", + "Update Users & Groups", + "Update users and groups on DataHub."); + private static final Privilege VIEW_ANALYTICS_PRIVILEGE = Privilege.of("VIEW_ANALYTICS", "View Analytics", "View the DataHub analytics dashboard."); @@ -177,6 +189,8 @@ public class PoliciesConfig { ImmutableList.of( MANAGE_POLICIES_PRIVILEGE, MANAGE_USERS_AND_GROUPS_PRIVILEGE, + CREATE_USERS_AND_GROUPS_PRIVILEGE, + UPDATE_USERS_AND_GROUPS_PRIVILEGE, VIEW_ANALYTICS_PRIVILEGE, GET_ANALYTICS_PRIVILEGE, MANAGE_DOMAINS_PRIVILEGE, @@ -926,13 +940,15 @@ public class PoliciesConfig { ImmutableMap.>>builder() .put( ApiOperation.CREATE, - Disjunctive.disjoint(MANAGE_USERS_AND_GROUPS_PRIVILEGE)) + Disjunctive.disjoint( + CREATE_USERS_AND_GROUPS_PRIVILEGE, MANAGE_USERS_AND_GROUPS_PRIVILEGE)) .put( ApiOperation.READ, API_PRIVILEGE_MAP.get(ApiGroup.ENTITY).get(ApiOperation.READ)) .put( ApiOperation.UPDATE, - Disjunctive.disjoint(MANAGE_USERS_AND_GROUPS_PRIVILEGE)) + Disjunctive.disjoint( + UPDATE_USERS_AND_GROUPS_PRIVILEGE, MANAGE_USERS_AND_GROUPS_PRIVILEGE)) .put( ApiOperation.DELETE, Disjunctive.disjoint(MANAGE_USERS_AND_GROUPS_PRIVILEGE)) @@ -945,13 +961,15 @@ public class PoliciesConfig { ImmutableMap.>>builder() .put( ApiOperation.CREATE, - Disjunctive.disjoint(MANAGE_USERS_AND_GROUPS_PRIVILEGE)) + Disjunctive.disjoint( + CREATE_USERS_AND_GROUPS_PRIVILEGE, MANAGE_USERS_AND_GROUPS_PRIVILEGE)) .put( ApiOperation.READ, API_PRIVILEGE_MAP.get(ApiGroup.ENTITY).get(ApiOperation.READ)) .put( ApiOperation.UPDATE, - Disjunctive.disjoint(MANAGE_USERS_AND_GROUPS_PRIVILEGE)) + Disjunctive.disjoint( + UPDATE_USERS_AND_GROUPS_PRIVILEGE, MANAGE_USERS_AND_GROUPS_PRIVILEGE)) .put( ApiOperation.DELETE, Disjunctive.disjoint(MANAGE_USERS_AND_GROUPS_PRIVILEGE)) diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/utils/GenericRecordUtils.java b/metadata-utils/src/main/java/com/linkedin/metadata/utils/GenericRecordUtils.java index 7974d239a25bc4..6638481c1d2794 100644 --- a/metadata-utils/src/main/java/com/linkedin/metadata/utils/GenericRecordUtils.java +++ b/metadata-utils/src/main/java/com/linkedin/metadata/utils/GenericRecordUtils.java @@ -59,9 +59,13 @@ public static T deserializePayload( @Nonnull public static GenericAspect serializeAspect(@Nonnull RecordTemplate aspect) { + return serializeAspect(RecordUtils.toJsonString(aspect)); + } + + @Nonnull + public static GenericAspect serializeAspect(@Nonnull String str) { GenericAspect genericAspect = new GenericAspect(); - genericAspect.setValue( - ByteString.unsafeWrap(RecordUtils.toJsonString(aspect).getBytes(StandardCharsets.UTF_8))); + genericAspect.setValue(ByteString.unsafeWrap(str.getBytes(StandardCharsets.UTF_8))); genericAspect.setContentType(GenericRecordUtils.JSON); return genericAspect; }