From 6aa6a8f639c5531011a9b2411699b38a0cdef4b4 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Mon, 1 Apr 2024 11:59:04 -0500 Subject: [PATCH 01/17] test(graph): refactor graph test (#10175) --- .../metadata/graph/GraphServiceTestBase.java | 409 +++++++++++------- .../graph/GraphServiceTestBaseNoVia.java | 2 +- .../graph/dgraph/DgraphGraphServiceTest.java | 2 +- .../graph/neo4j/Neo4jGraphServiceTest.java | 104 ++--- .../search/SearchGraphServiceTestBase.java | 86 ++-- 5 files changed, 346 insertions(+), 257 deletions(-) diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBase.java index 1fa89ec99973f..38c9136113dbb 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBase.java @@ -31,8 +31,10 @@ import java.util.Set; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; +import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.function.Function; import java.util.stream.Collectors; @@ -40,8 +42,6 @@ import java.util.stream.Stream; import javax.annotation.Nonnull; import javax.annotation.Nullable; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.ValueSource; import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; import org.testng.Assert; import org.testng.annotations.BeforeMethod; @@ -81,37 +81,114 @@ public int compare(RelatedEntity left, RelatedEntity right) { protected static String userType = "user"; /** Some test datasets. */ - protected static String datasetOneUrnString = - "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDatasetOne,PROD)"; - - protected static String datasetTwoUrnString = - "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDatasetTwo,PROD)"; - protected static String datasetThreeUrnString = - "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDatasetThree,PROD)"; - protected static String datasetFourUrnString = - "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDatasetFour,PROD)"; - protected static String datasetFiveUrnString = - "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDatasetFive,PROD)"; - - protected static final String schemaFieldUrnOneString = - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:type,SampleDatasetFive,PROD),fieldOne)"; - protected static final String schemaFieldUrnTwoString = - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:type,SampleDatasetFour,PROD),fieldTwo)"; - - protected static final String lifeCycleOwnerOneString = + protected static String dataset1UrnString = + "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDataset1,PROD)"; + + protected static String dataset2UrnString = + "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDataset2,PROD)"; + protected static String dataset3UrnString = + "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDataset3,PROD)"; + protected static String dataset4UrnString = + "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDataset4,PROD)"; + protected static String dataset5UrnString = + "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDataset5,PROD)"; + + protected static String dataset6UrnString = + "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDataset6,PROD)"; + + protected static String dataset7UrnString = + "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDataset7,PROD)"; + + protected static String dataset8UrnString = + "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDataset8,PROD)"; + protected static String dataset9UrnString = + "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDataset9,PROD)"; + protected static String dataset10UrnString = + "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDataset10,PROD)"; + protected static String dataset11UrnString = + "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDataset11,PROD)"; + protected static String dataset12UrnString = + "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDataset12,PROD)"; + protected static String dataset13UrnString = + "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDataset13,PROD)"; + protected static String dataset14UrnString = + "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDataset14,PROD)"; + protected static String dataset15UrnString = + "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDataset15,PROD)"; + protected static String dataset16UrnString = + "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDataset16,PROD)"; + protected static String dataset17UrnString = + "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDataset17,PROD)"; + protected static String dataset18UrnString = + "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDataset18,PROD)"; + + protected static String dataset19UrnString = + "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDataset19,PROD)"; + + protected static String dataset20UrnString = + "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDataset20,PROD)"; + + protected static Urn dataset1Urn = createFromString(dataset1UrnString); + protected static Urn dataset2Urn = createFromString(dataset2UrnString); + protected static Urn dataset3Urn = createFromString(dataset3UrnString); + protected static Urn dataset4Urn = createFromString(dataset4UrnString); + protected static Urn dataset5Urn = createFromString(dataset5UrnString); + protected static Urn dataset6Urn = createFromString(dataset6UrnString); + + protected static Urn dataset7Urn = createFromString(dataset7UrnString); + + protected static Urn dataset8Urn = createFromString(dataset8UrnString); + protected static Urn dataset9Urn = createFromString(dataset9UrnString); + protected static Urn dataset10Urn = createFromString(dataset10UrnString); + protected static Urn dataset11Urn = createFromString(dataset11UrnString); + protected static Urn dataset12Urn = createFromString(dataset12UrnString); + protected static Urn dataset13Urn = createFromString(dataset13UrnString); + protected static Urn dataset14Urn = createFromString(dataset14UrnString); + protected static Urn dataset15Urn = createFromString(dataset15UrnString); + protected static Urn dataset16Urn = createFromString(dataset16UrnString); + protected static Urn dataset17Urn = createFromString(dataset17UrnString); + protected static Urn dataset18Urn = createFromString(dataset18UrnString); + + protected static Urn dataset19Urn = createFromString(dataset19UrnString); + + protected static Urn dataset20Urn = createFromString(dataset20UrnString); + protected static List datasetUrns = + List.of( + dataset1Urn, + dataset2Urn, + dataset3Urn, + dataset4Urn, + dataset5Urn, + dataset6Urn, + dataset7Urn, + dataset8Urn, + dataset9Urn, + dataset10Urn, + dataset11Urn, + dataset12Urn, + dataset13Urn, + dataset14Urn, + dataset15Urn, + dataset16Urn, + dataset17Urn, + dataset18Urn, + dataset19Urn, + dataset20Urn); + + protected static final String schemaFieldUrn1String = + "urn:li:schemaField:(" + dataset5UrnString + ",fieldOne)"; + protected static final String schemaFieldUrn2String = + "urn:li:schemaField:(" + dataset4UrnString + ",fieldTwo)"; + + protected static final String lifeCycleOwner1String = "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)"; - protected static final String lifeCycleOwnerTwoString = + protected static final String lifeCycleOwner2String = "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)"; - protected static Urn datasetOneUrn = createFromString(datasetOneUrnString); - protected static Urn datasetTwoUrn = createFromString(datasetTwoUrnString); - protected static Urn datasetThreeUrn = createFromString(datasetThreeUrnString); - protected static Urn datasetFourUrn = createFromString(datasetFourUrnString); - protected static Urn datasetFiveUrn = createFromString(datasetFiveUrnString); - protected static final Urn schemaFieldUrnOne = createFromString(schemaFieldUrnOneString); - protected static final Urn schemaFieldUrnTwo = createFromString(schemaFieldUrnTwoString); - protected static final Urn lifeCycleOwnerOne = createFromString(lifeCycleOwnerOneString); - protected static final Urn lifeCycleOwnerTwo = createFromString(lifeCycleOwnerTwoString); + protected static final Urn schemaFieldUrnOne = createFromString(schemaFieldUrn1String); + protected static final Urn schemaFieldUrnTwo = createFromString(schemaFieldUrn2String); + protected static final Urn lifeCycleOwnerOne = createFromString(lifeCycleOwner1String); + protected static final Urn lifeCycleOwnerTwo = createFromString(lifeCycleOwner2String); protected static String unknownUrnString = "urn:li:unknown:(urn:li:unknown:Unknown)"; @@ -146,31 +223,31 @@ public int compare(RelatedEntity left, RelatedEntity right) { /** Some expected related entities. */ protected static RelatedEntity downstreamOfDatasetOneRelatedEntity = - new RelatedEntity(downstreamOf, datasetOneUrnString); + new RelatedEntity(downstreamOf, dataset1UrnString); protected static RelatedEntity downstreamOfDatasetTwoRelatedEntity = - new RelatedEntity(downstreamOf, datasetTwoUrnString); + new RelatedEntity(downstreamOf, dataset2UrnString); protected static RelatedEntity downstreamOfDatasetThreeRelatedEntity = - new RelatedEntity(downstreamOf, datasetThreeUrnString); + new RelatedEntity(downstreamOf, dataset3UrnString); protected static RelatedEntity downstreamOfDatasetFourRelatedEntity = - new RelatedEntity(downstreamOf, datasetFourUrnString); + new RelatedEntity(downstreamOf, dataset4UrnString); protected static final RelatedEntity downstreamOfSchemaFieldOneVia = - new RelatedEntity(downstreamOf, schemaFieldUrnOneString, lifeCycleOwnerOneString); + new RelatedEntity(downstreamOf, schemaFieldUrn1String, lifeCycleOwner1String); protected static final RelatedEntity downstreamOfSchemaFieldOne = - new RelatedEntity(downstreamOf, schemaFieldUrnOneString); + new RelatedEntity(downstreamOf, schemaFieldUrn1String); protected static final RelatedEntity downstreamOfSchemaFieldTwoVia = - new RelatedEntity(downstreamOf, schemaFieldUrnTwoString, lifeCycleOwnerOneString); + new RelatedEntity(downstreamOf, schemaFieldUrn2String, lifeCycleOwner1String); protected static final RelatedEntity downstreamOfSchemaFieldTwo = - new RelatedEntity(downstreamOf, schemaFieldUrnTwoString); + new RelatedEntity(downstreamOf, schemaFieldUrn2String); protected static RelatedEntity hasOwnerDatasetOneRelatedEntity = - new RelatedEntity(hasOwner, datasetOneUrnString); + new RelatedEntity(hasOwner, dataset1UrnString); protected static RelatedEntity hasOwnerDatasetTwoRelatedEntity = - new RelatedEntity(hasOwner, datasetTwoUrnString); + new RelatedEntity(hasOwner, dataset2UrnString); protected static RelatedEntity hasOwnerDatasetThreeRelatedEntity = - new RelatedEntity(hasOwner, datasetThreeUrnString); + new RelatedEntity(hasOwner, dataset3UrnString); protected static RelatedEntity hasOwnerDatasetFourRelatedEntity = - new RelatedEntity(hasOwner, datasetFourUrnString); + new RelatedEntity(hasOwner, dataset4UrnString); protected static RelatedEntity hasOwnerUserOneRelatedEntity = new RelatedEntity(hasOwner, userOneUrnString); protected static RelatedEntity hasOwnerUserTwoRelatedEntity = @@ -207,10 +284,10 @@ public void disableAssert() { @Test public void testStaticUrns() { - assertNotNull(datasetOneUrn); - assertNotNull(datasetTwoUrn); - assertNotNull(datasetThreeUrn); - assertNotNull(datasetFourUrn); + assertNotNull(dataset1Urn); + assertNotNull(dataset2Urn); + assertNotNull(dataset3Urn); + assertNotNull(dataset4Urn); assertNotNull(userOneUrn); assertNotNull(userTwoUrn); @@ -260,13 +337,13 @@ protected GraphService getPopulatedGraphService() throws Exception { List edges = Arrays.asList( - new Edge(datasetTwoUrn, datasetOneUrn, downstreamOf, null, null, null, null, null), - new Edge(datasetThreeUrn, datasetTwoUrn, downstreamOf, null, null, null, null, null), - new Edge(datasetFourUrn, datasetTwoUrn, downstreamOf, null, null, null, null, null), - new Edge(datasetOneUrn, userOneUrn, hasOwner, null, null, null, null, null), - new Edge(datasetTwoUrn, userOneUrn, hasOwner, null, null, null, null, null), - new Edge(datasetThreeUrn, userTwoUrn, hasOwner, null, null, null, null, null), - new Edge(datasetFourUrn, userTwoUrn, hasOwner, null, null, null, null, null), + new Edge(dataset2Urn, dataset1Urn, downstreamOf, null, null, null, null, null), + new Edge(dataset3Urn, dataset2Urn, downstreamOf, null, null, null, null, null), + new Edge(dataset4Urn, dataset2Urn, downstreamOf, null, null, null, null, null), + new Edge(dataset1Urn, userOneUrn, hasOwner, null, null, null, null, null), + new Edge(dataset2Urn, userOneUrn, hasOwner, null, null, null, null, null), + new Edge(dataset3Urn, userTwoUrn, hasOwner, null, null, null, null, null), + new Edge(dataset4Urn, userTwoUrn, hasOwner, null, null, null, null, null), new Edge(userOneUrn, userTwoUrn, knowsUser, null, null, null, null, null), new Edge(userTwoUrn, userOneUrn, knowsUser, null, null, null, null, null), new Edge( @@ -308,21 +385,21 @@ protected GraphService getLineagePopulatedGraphService(boolean multiPathSearch) List edges = Arrays.asList( - new Edge(datasetTwoUrn, datasetOneUrn, downstreamOf, null, null, null, null, null), - new Edge(datasetThreeUrn, datasetTwoUrn, downstreamOf, null, null, null, null, null), - new Edge(datasetFourUrn, datasetTwoUrn, downstreamOf, null, null, null, null, null), - new Edge(datasetOneUrn, userOneUrn, hasOwner, null, null, null, null, null), - new Edge(datasetTwoUrn, userOneUrn, hasOwner, null, null, null, null, null), - new Edge(datasetThreeUrn, userTwoUrn, hasOwner, null, null, null, null, null), - new Edge(datasetFourUrn, userTwoUrn, hasOwner, null, null, null, null, null), + new Edge(dataset2Urn, dataset1Urn, downstreamOf, null, null, null, null, null), + new Edge(dataset3Urn, dataset2Urn, downstreamOf, null, null, null, null, null), + new Edge(dataset4Urn, dataset2Urn, downstreamOf, null, null, null, null, null), + new Edge(dataset1Urn, userOneUrn, hasOwner, null, null, null, null, null), + new Edge(dataset2Urn, userOneUrn, hasOwner, null, null, null, null, null), + new Edge(dataset3Urn, userTwoUrn, hasOwner, null, null, null, null, null), + new Edge(dataset4Urn, userTwoUrn, hasOwner, null, null, null, null, null), new Edge(userOneUrn, userTwoUrn, knowsUser, null, null, null, null, null), new Edge(userTwoUrn, userOneUrn, knowsUser, null, null, null, null, null), - new Edge(dataJobOneUrn, datasetOneUrn, consumes, null, null, null, null, null), - new Edge(dataJobOneUrn, datasetTwoUrn, consumes, null, null, null, null, null), - new Edge(dataJobOneUrn, datasetThreeUrn, produces, null, null, null, null, null), - new Edge(dataJobOneUrn, datasetFourUrn, produces, null, null, null, null, null), - new Edge(dataJobTwoUrn, datasetOneUrn, consumes, null, null, null, null, null), - new Edge(dataJobTwoUrn, datasetTwoUrn, consumes, null, null, null, null, null), + new Edge(dataJobOneUrn, dataset1Urn, consumes, null, null, null, null, null), + new Edge(dataJobOneUrn, dataset2Urn, consumes, null, null, null, null, null), + new Edge(dataJobOneUrn, dataset3Urn, produces, null, null, null, null, null), + new Edge(dataJobOneUrn, dataset4Urn, produces, null, null, null, null, null), + new Edge(dataJobTwoUrn, dataset1Urn, consumes, null, null, null, null, null), + new Edge(dataJobTwoUrn, dataset2Urn, consumes, null, null, null, null, null), new Edge(dataJobTwoUrn, dataJobOneUrn, downstreamOf, null, null, null, null, null)); edges.forEach(service::addEdge); @@ -371,22 +448,22 @@ public Object[][] getAddEdgeTests() { new Object[] {Collections.emptyList(), Collections.emptyList(), Collections.emptyList()}, new Object[] { Collections.singletonList( - new Edge(datasetOneUrn, datasetTwoUrn, downstreamOf, null, null, null, null, null)), + new Edge(dataset1Urn, dataset2Urn, downstreamOf, null, null, null, null, null)), Collections.singletonList(downstreamOfDatasetTwoRelatedEntity), Collections.singletonList(downstreamOfDatasetOneRelatedEntity) }, new Object[] { Arrays.asList( - new Edge(datasetOneUrn, datasetTwoUrn, downstreamOf, null, null, null, null, null), - new Edge(datasetTwoUrn, datasetThreeUrn, downstreamOf, null, null, null, null, null)), + new Edge(dataset1Urn, dataset2Urn, downstreamOf, null, null, null, null, null), + new Edge(dataset2Urn, dataset3Urn, downstreamOf, null, null, null, null, null)), Arrays.asList(downstreamOfDatasetTwoRelatedEntity, downstreamOfDatasetThreeRelatedEntity), Arrays.asList(downstreamOfDatasetOneRelatedEntity, downstreamOfDatasetTwoRelatedEntity) }, new Object[] { Arrays.asList( - new Edge(datasetOneUrn, datasetTwoUrn, downstreamOf, null, null, null, null, null), - new Edge(datasetOneUrn, userOneUrn, hasOwner, null, null, null, null, null), - new Edge(datasetTwoUrn, userTwoUrn, hasOwner, null, null, null, null, null), + new Edge(dataset1Urn, dataset2Urn, downstreamOf, null, null, null, null, null), + new Edge(dataset1Urn, userOneUrn, hasOwner, null, null, null, null, null), + new Edge(dataset2Urn, userTwoUrn, hasOwner, null, null, null, null, null), new Edge(userOneUrn, userTwoUrn, knowsUser, null, null, null, null, null)), Arrays.asList( downstreamOfDatasetTwoRelatedEntity, @@ -531,37 +608,36 @@ public void testPopulatedGraphServiceGetLineage() throws Exception { GraphService service = getLineagePopulatedGraphService(); EntityLineageResult upstreamLineage = - service.getLineage(datasetOneUrn, LineageDirection.UPSTREAM, 0, 1000, 1); + service.getLineage(dataset1Urn, LineageDirection.UPSTREAM, 0, 1000, 1); assertEquals(upstreamLineage.getTotal().intValue(), 0); assertEquals(upstreamLineage.getRelationships().size(), 0); EntityLineageResult downstreamLineage = - service.getLineage(datasetOneUrn, LineageDirection.DOWNSTREAM, 0, 1000, 1); + service.getLineage(dataset1Urn, LineageDirection.DOWNSTREAM, 0, 1000, 1); assertEquals(downstreamLineage.getTotal().intValue(), 3); assertEquals(downstreamLineage.getRelationships().size(), 3); Map relationships = downstreamLineage.getRelationships().stream() .collect(Collectors.toMap(LineageRelationship::getEntity, Function.identity())); - assertTrue(relationships.containsKey(datasetTwoUrn)); - assertEquals(relationships.get(datasetTwoUrn).getType(), downstreamOf); + assertTrue(relationships.containsKey(dataset2Urn)); + assertEquals(relationships.get(dataset2Urn).getType(), downstreamOf); assertTrue(relationships.containsKey(dataJobOneUrn)); assertEquals(relationships.get(dataJobOneUrn).getType(), consumes); assertTrue(relationships.containsKey(dataJobTwoUrn)); assertEquals(relationships.get(dataJobTwoUrn).getType(), consumes); - upstreamLineage = service.getLineage(datasetThreeUrn, LineageDirection.UPSTREAM, 0, 1000, 1); + upstreamLineage = service.getLineage(dataset3Urn, LineageDirection.UPSTREAM, 0, 1000, 1); assertEquals(upstreamLineage.getTotal().intValue(), 2); assertEquals(upstreamLineage.getRelationships().size(), 2); relationships = upstreamLineage.getRelationships().stream() .collect(Collectors.toMap(LineageRelationship::getEntity, Function.identity())); - assertTrue(relationships.containsKey(datasetTwoUrn)); - assertEquals(relationships.get(datasetTwoUrn).getType(), downstreamOf); + assertTrue(relationships.containsKey(dataset2Urn)); + assertEquals(relationships.get(dataset2Urn).getType(), downstreamOf); assertTrue(relationships.containsKey(dataJobOneUrn)); assertEquals(relationships.get(dataJobOneUrn).getType(), produces); - downstreamLineage = - service.getLineage(datasetThreeUrn, LineageDirection.DOWNSTREAM, 0, 1000, 1); + downstreamLineage = service.getLineage(dataset3Urn, LineageDirection.DOWNSTREAM, 0, 1000, 1); assertEquals(downstreamLineage.getTotal().intValue(), 0); assertEquals(downstreamLineage.getRelationships().size(), 0); @@ -571,10 +647,10 @@ public void testPopulatedGraphServiceGetLineage() throws Exception { relationships = upstreamLineage.getRelationships().stream() .collect(Collectors.toMap(LineageRelationship::getEntity, Function.identity())); - assertTrue(relationships.containsKey(datasetOneUrn)); - assertEquals(relationships.get(datasetOneUrn).getType(), consumes); - assertTrue(relationships.containsKey(datasetTwoUrn)); - assertEquals(relationships.get(datasetTwoUrn).getType(), consumes); + assertTrue(relationships.containsKey(dataset1Urn)); + assertEquals(relationships.get(dataset1Urn).getType(), consumes); + assertTrue(relationships.containsKey(dataset2Urn)); + assertEquals(relationships.get(dataset2Urn).getType(), consumes); downstreamLineage = service.getLineage(dataJobOneUrn, LineageDirection.DOWNSTREAM, 0, 1000, 1); assertEquals(downstreamLineage.getTotal().intValue(), 3); @@ -582,10 +658,10 @@ public void testPopulatedGraphServiceGetLineage() throws Exception { relationships = downstreamLineage.getRelationships().stream() .collect(Collectors.toMap(LineageRelationship::getEntity, Function.identity())); - assertTrue(relationships.containsKey(datasetThreeUrn)); - assertEquals(relationships.get(datasetThreeUrn).getType(), produces); - assertTrue(relationships.containsKey(datasetFourUrn)); - assertEquals(relationships.get(datasetFourUrn).getType(), produces); + assertTrue(relationships.containsKey(dataset3Urn)); + assertEquals(relationships.get(dataset3Urn).getType(), produces); + assertTrue(relationships.containsKey(dataset4Urn)); + assertEquals(relationships.get(dataset4Urn).getType(), produces); assertTrue(relationships.containsKey(dataJobTwoUrn)); assertEquals(relationships.get(dataJobTwoUrn).getType(), downstreamOf); } @@ -594,19 +670,19 @@ public void testPopulatedGraphServiceGetLineage() throws Exception { public Object[][] getFindRelatedEntitiesSourceEntityFilterTests() { return new Object[][] { new Object[] { - newFilter("urn", datasetTwoUrnString), + newFilter("urn", dataset2UrnString), Collections.singletonList(downstreamOf), outgoingRelationships, Collections.singletonList(downstreamOfDatasetOneRelatedEntity) }, new Object[] { - newFilter("urn", datasetTwoUrnString), + newFilter("urn", dataset2UrnString), Collections.singletonList(downstreamOf), incomingRelationships, Arrays.asList(downstreamOfDatasetThreeRelatedEntity, downstreamOfDatasetFourRelatedEntity) }, new Object[] { - newFilter("urn", datasetTwoUrnString), + newFilter("urn", dataset2UrnString), Collections.singletonList(downstreamOf), undirectedRelationships, Arrays.asList( @@ -615,19 +691,19 @@ public Object[][] getFindRelatedEntitiesSourceEntityFilterTests() { downstreamOfDatasetFourRelatedEntity) }, new Object[] { - newFilter("urn", datasetTwoUrnString), + newFilter("urn", dataset2UrnString), Collections.singletonList(hasOwner), outgoingRelationships, Collections.singletonList(hasOwnerUserOneRelatedEntity) }, new Object[] { - newFilter("urn", datasetTwoUrnString), + newFilter("urn", dataset2UrnString), Collections.singletonList(hasOwner), incomingRelationships, Collections.emptyList() }, new Object[] { - newFilter("urn", datasetTwoUrnString), + newFilter("urn", dataset2UrnString), Collections.singletonList(hasOwner), undirectedRelationships, Collections.singletonList(hasOwnerUserOneRelatedEntity) @@ -672,19 +748,19 @@ public void testFindRelatedEntitiesSourceEntityFilter( public Object[][] getFindRelatedEntitiesDestinationEntityFilterTests() { return new Object[][] { new Object[] { - newFilter("urn", datasetTwoUrnString), + newFilter("urn", dataset2UrnString), Collections.singletonList(downstreamOf), outgoingRelationships, Collections.singletonList(downstreamOfDatasetTwoRelatedEntity) }, new Object[] { - newFilter("urn", datasetTwoUrnString), + newFilter("urn", dataset2UrnString), Collections.singletonList(downstreamOf), incomingRelationships, Collections.singletonList(downstreamOfDatasetTwoRelatedEntity) }, new Object[] { - newFilter("urn", datasetTwoUrnString), + newFilter("urn", dataset2UrnString), Collections.singletonList(downstreamOf), undirectedRelationships, Collections.singletonList(downstreamOfDatasetTwoRelatedEntity) @@ -1083,8 +1159,7 @@ public void testFindRelatedEntitiesNullSourceType() throws Exception { doTestFindRelatedEntitiesEntityType( anyType, null, downstreamOf, outgoingRelationships, service); - service.addEdge( - new Edge(datasetTwoUrn, datasetOneUrn, downstreamOf, null, null, null, null, null)); + service.addEdge(new Edge(dataset2Urn, dataset1Urn, downstreamOf, null, null, null, null, null)); syncAfterWrite(); doTestFindRelatedEntitiesEntityType( anyType, ImmutableList.of("null"), downstreamOf, outgoingRelationships, service); @@ -1096,7 +1171,7 @@ public void testFindRelatedEntitiesNullSourceType() throws Exception { service, downstreamOfDatasetOneRelatedEntity); - service.addEdge(new Edge(datasetOneUrn, nullUrn, downstreamOf, null, null, null, null, null)); + service.addEdge(new Edge(dataset1Urn, nullUrn, downstreamOf, null, null, null, null, null)); syncAfterWrite(); doTestFindRelatedEntitiesEntityType( anyType, @@ -1128,8 +1203,7 @@ public void testFindRelatedEntitiesNullDestinationType() throws Exception { doTestFindRelatedEntitiesEntityType( anyType, null, downstreamOf, outgoingRelationships, service); - service.addEdge( - new Edge(datasetTwoUrn, datasetOneUrn, downstreamOf, null, null, null, null, null)); + service.addEdge(new Edge(dataset2Urn, dataset1Urn, downstreamOf, null, null, null, null, null)); syncAfterWrite(); doTestFindRelatedEntitiesEntityType( anyType, ImmutableList.of("null"), downstreamOf, outgoingRelationships, service); @@ -1141,7 +1215,7 @@ public void testFindRelatedEntitiesNullDestinationType() throws Exception { service, downstreamOfDatasetOneRelatedEntity); - service.addEdge(new Edge(datasetOneUrn, nullUrn, downstreamOf, null, null, null, null, null)); + service.addEdge(new Edge(dataset1Urn, nullUrn, downstreamOf, null, null, null, null, null)); syncAfterWrite(); doTestFindRelatedEntitiesEntityType( anyType, @@ -1281,7 +1355,7 @@ public void testFindRelatedEntitiesAllFilters() throws Exception { RelatedEntitiesResult relatedEntities = service.findRelatedEntities( ImmutableList.of(datasetType), - newFilter("urn", datasetOneUrnString), + newFilter("urn", dataset1UrnString), ImmutableList.of(userType), newFilter("urn", userOneUrnString), Collections.singletonList(hasOwner), @@ -1294,7 +1368,7 @@ public void testFindRelatedEntitiesAllFilters() throws Exception { relatedEntities = service.findRelatedEntities( ImmutableList.of(datasetType), - newFilter("urn", datasetOneUrnString), + newFilter("urn", dataset1UrnString), ImmutableList.of(userType), newFilter("urn", userTwoUrnString), Collections.singletonList(hasOwner), @@ -1312,7 +1386,7 @@ public void testFindRelatedEntitiesMultipleEntityTypes() throws Exception { RelatedEntitiesResult relatedEntities = service.findRelatedEntities( ImmutableList.of(datasetType, userType), - newFilter("urn", datasetOneUrnString), + newFilter("urn", dataset1UrnString), ImmutableList.of(datasetType, userType), newFilter("urn", userOneUrnString), Collections.singletonList(hasOwner), @@ -1325,7 +1399,7 @@ public void testFindRelatedEntitiesMultipleEntityTypes() throws Exception { relatedEntities = service.findRelatedEntities( ImmutableList.of(datasetType, userType), - newFilter("urn", datasetOneUrnString), + newFilter("urn", dataset1UrnString), ImmutableList.of(datasetType, userType), newFilter("urn", userTwoUrnString), Collections.singletonList(hasOwner), @@ -1374,7 +1448,7 @@ public void testFindRelatedEntitiesOffsetAndCount() throws Exception { public Object[][] getRemoveEdgesFromNodeTests() { return new Object[][] { new Object[] { - datasetTwoUrn, + dataset2Urn, Collections.singletonList(downstreamOf), outgoingRelationships, Collections.singletonList(downstreamOfDatasetOneRelatedEntity), @@ -1383,7 +1457,7 @@ public Object[][] getRemoveEdgesFromNodeTests() { Arrays.asList(downstreamOfDatasetThreeRelatedEntity, downstreamOfDatasetFourRelatedEntity) }, new Object[] { - datasetTwoUrn, + dataset2Urn, Collections.singletonList(downstreamOf), incomingRelationships, Collections.singletonList(downstreamOfDatasetOneRelatedEntity), @@ -1392,7 +1466,7 @@ public Object[][] getRemoveEdgesFromNodeTests() { Collections.emptyList(), }, new Object[] { - datasetTwoUrn, + dataset2Urn, Collections.singletonList(downstreamOf), undirectedRelationships, Collections.singletonList(downstreamOfDatasetOneRelatedEntity), @@ -1567,7 +1641,7 @@ public void testRemoveEdgesFromNode( @Test public void testRemoveEdgesFromNodeNoRelationshipTypes() throws Exception { GraphService service = getPopulatedGraphService(); - Urn nodeToRemoveFrom = datasetOneUrn; + Urn nodeToRemoveFrom = dataset1Urn; // populated graph asserted in testPopulatedGraphService RelatedEntitiesResult relatedOutgoingEntitiesBeforeRemove = @@ -1662,7 +1736,7 @@ public void testRemoveEdgesFromUnknownNode() throws Exception { public void testRemoveNode() throws Exception { GraphService service = getPopulatedGraphService(); - service.removeNode(datasetTwoUrn); + service.removeNode(dataset2Urn); syncAfterWrite(); // assert the modified graph @@ -1961,8 +2035,19 @@ private void doTestConcurrentOp(Stream operations) throws Exception { }) .collect(Collectors.toList()); try { - executorPool.invokeAll( - callables, getTestConcurrentOpTimeout().toMillis(), TimeUnit.MILLISECONDS); + List> futures = + executorPool.invokeAll( + callables, getTestConcurrentOpTimeout().toMillis(), TimeUnit.MILLISECONDS); + futures.forEach( + future -> { + try { + future.get(); + } catch (InterruptedException | ExecutionException e) { + System.err.println( + System.currentTimeMillis() + + ": unable to complete execution of concurrent operations in time"); + } + }); } catch (InterruptedException e) { System.err.println( System.currentTimeMillis() @@ -1976,9 +2061,13 @@ private void doTestConcurrentOp(Stream operations) throws Exception { assertTrue(throwables.isEmpty()); } - @ParameterizedTest - @ValueSource(booleans = {true, false}) - public void testPopulatedGraphServiceGetLineageMultihop(boolean attemptMultiPathAlgo) + @DataProvider(name = "trueFalse") + public static Object[] trueFalse() { + return new Object[] {true, false}; + } + + @Test(dataProvider = "trueFalse") + public void testPopulatedGraphServiceGetLineageMultihop(Boolean attemptMultiPathAlgo) throws Exception { GraphService service = getLineagePopulatedGraphService(attemptMultiPathAlgo); @@ -1988,12 +2077,12 @@ public void testPopulatedGraphServiceGetLineageMultihop(boolean attemptMultiPath (!((service instanceof Neo4jGraphService) || (service instanceof DgraphGraphService))); EntityLineageResult upstreamLineage = - service.getLineage(datasetOneUrn, LineageDirection.UPSTREAM, 0, 1000, 2); + service.getLineage(dataset1Urn, LineageDirection.UPSTREAM, 0, 1000, 2); assertEquals(upstreamLineage.getTotal().intValue(), 0); assertEquals(upstreamLineage.getRelationships().size(), 0); EntityLineageResult downstreamLineage = - service.getLineage(datasetOneUrn, LineageDirection.DOWNSTREAM, 0, 1000, 2); + service.getLineage(dataset1Urn, LineageDirection.DOWNSTREAM, 0, 1000, 2); assertEquals(downstreamLineage.getTotal().intValue(), 5); assertEquals(downstreamLineage.getRelationships().size(), 5); @@ -2002,12 +2091,12 @@ public void testPopulatedGraphServiceGetLineageMultihop(boolean attemptMultiPath .collect(Collectors.toMap(LineageRelationship::getEntity, Function.identity())); Set entities = relationships.keySet().stream().collect(Collectors.toUnmodifiableSet()); assertEquals(entities.size(), 5); - assertTrue(relationships.containsKey(datasetTwoUrn)); + assertTrue(relationships.containsKey(dataset2Urn)); assertEquals(relationships.get(dataJobTwoUrn).getDegree(), 1); - assertTrue(relationships.containsKey(datasetThreeUrn)); - assertEquals(relationships.get(datasetThreeUrn).getDegree(), 2); - assertTrue(relationships.containsKey(datasetFourUrn)); - assertEquals(relationships.get(datasetFourUrn).getDegree(), 2); + assertTrue(relationships.containsKey(dataset3Urn)); + assertEquals(relationships.get(dataset3Urn).getDegree(), 2); + assertTrue(relationships.containsKey(dataset4Urn)); + assertEquals(relationships.get(dataset4Urn).getDegree(), 2); assertTrue(relationships.containsKey(dataJobOneUrn)); assertEquals(relationships.get(dataJobOneUrn).getDegree(), 1); // dataJobOne is present both at degree 1 and degree 2 @@ -2018,21 +2107,20 @@ public void testPopulatedGraphServiceGetLineageMultihop(boolean attemptMultiPath assertTrue(relationships.containsKey(dataJobTwoUrn)); assertEquals(relationships.get(dataJobTwoUrn).getDegree(), 1); - upstreamLineage = service.getLineage(datasetThreeUrn, LineageDirection.UPSTREAM, 0, 1000, 2); + upstreamLineage = service.getLineage(dataset3Urn, LineageDirection.UPSTREAM, 0, 1000, 2); assertEquals(upstreamLineage.getTotal().intValue(), 3); assertEquals(upstreamLineage.getRelationships().size(), 3); relationships = upstreamLineage.getRelationships().stream() .collect(Collectors.toMap(LineageRelationship::getEntity, Function.identity())); - assertTrue(relationships.containsKey(datasetOneUrn)); - assertEquals(relationships.get(datasetOneUrn).getDegree(), 2); - assertTrue(relationships.containsKey(datasetTwoUrn)); - assertEquals(relationships.get(datasetTwoUrn).getDegree(), 1); + assertTrue(relationships.containsKey(dataset1Urn)); + assertEquals(relationships.get(dataset1Urn).getDegree(), 2); + assertTrue(relationships.containsKey(dataset2Urn)); + assertEquals(relationships.get(dataset2Urn).getDegree(), 1); assertTrue(relationships.containsKey(dataJobOneUrn)); assertEquals(relationships.get(dataJobOneUrn).getDegree(), 1); - downstreamLineage = - service.getLineage(datasetThreeUrn, LineageDirection.DOWNSTREAM, 0, 1000, 2); + downstreamLineage = service.getLineage(dataset3Urn, LineageDirection.DOWNSTREAM, 0, 1000, 2); assertEquals(downstreamLineage.getTotal().intValue(), 0); assertEquals(downstreamLineage.getRelationships().size(), 0); } @@ -2040,34 +2128,37 @@ public void testPopulatedGraphServiceGetLineageMultihop(boolean attemptMultiPath @Test public void testHighlyConnectedGraphWalk() throws Exception { final GraphService service = getGraphService(); - - int nodes = 25; - List allRelationships = Arrays.asList(downstreamOf, consumes, hasOwner); - List edges = - getFullyConnectedGraph(nodes, allRelationships, Collections.singletonList(datasetType)); + List allRelationships = Collections.singletonList(downstreamOf); + List edges = createHighlyConnectedGraph(); Stream operations = edges.stream().map(edge -> () -> service.addEdge(edge)); doTestConcurrentOp(operations); syncAfterWrite(); - RelatedEntitiesResult relatedEntities = - service.findRelatedEntities( - null, - EMPTY_FILTER, - null, - EMPTY_FILTER, - allRelationships, - outgoingRelationships, - 0, - nodes * 3 * 2); - Set expectedRelatedEntities = edges.stream() .map( edge -> new RelatedEntity(edge.getRelationshipType(), edge.getDestination().toString())) .collect(Collectors.toSet()); + RelatedEntitiesResult relatedEntities = null; + for (int i = 0; i < 3; i++) { + relatedEntities = + service.findRelatedEntities( + null, + EMPTY_FILTER, + null, + EMPTY_FILTER, + allRelationships, + outgoingRelationships, + 0, + 400); + if (!new HashSet<>(relatedEntities.getEntities()).equals(expectedRelatedEntities)) { + // Sleep up to 6 seconds in case Elastic needs to catch up + Thread.sleep(2000); + } + } assertEquals(new HashSet<>(relatedEntities.getEntities()), expectedRelatedEntities); Urn root = UrnUtils.getUrn(relatedEntities.getEntities().get(0).getUrn()); @@ -2087,7 +2178,7 @@ public void testHighlyConnectedGraphWalk() throws Exception { 1000, 100, new LineageFlags().setEntitiesExploredPerHopLimit(5)); - assertEquals(lineageResult.getRelationships().size(), 24); + assertEquals(lineageResult.getRelationships().size(), 19); LineageRelationshipArray relationships = lineageResult.getRelationships(); int maxDegree = relationships.stream() @@ -2112,15 +2203,31 @@ public void testHighlyConnectedGraphWalk() throws Exception { 100, new LineageFlags().setEntitiesExploredPerHopLimit(5)); - assertEquals(lineageResultMulti.getRelationships().size(), 25); + assertEquals(lineageResultMulti.getRelationships().size(), 20); relationships = lineageResultMulti.getRelationships(); maxDegree = relationships.stream() .flatMap(relationship -> relationship.getDegrees().stream()) .reduce(0, Math::max); - assertTrue(maxDegree > 6); + assertTrue(maxDegree > 4); // Reset graph service getGraphService(); } + + protected List createHighlyConnectedGraph() { + List graph = new ArrayList<>(); + for (Urn sourceUrn : datasetUrns) { + for (Urn destUrn : datasetUrns) { + if (sourceUrn.equals(destUrn)) { + continue; + } + Edge edge = + new Edge( + sourceUrn, destUrn, downstreamOf, 0L, userOneUrn, 0L, userOneUrn, null, null, null); + graph.add(edge); + } + } + return graph; + } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBaseNoVia.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBaseNoVia.java index 19ca2e85e8c54..e4cefaa1feaa1 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBaseNoVia.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBaseNoVia.java @@ -360,7 +360,7 @@ public void testPopulatedGraphService() throws Exception { public void testRemoveNode() throws Exception { GraphService service = getPopulatedGraphService(); - service.removeNode(datasetTwoUrn); + service.removeNode(dataset2Urn); syncAfterWrite(); // assert the modified graph diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/dgraph/DgraphGraphServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/dgraph/DgraphGraphServiceTest.java index 4f8fa54b028ff..680776a8e777c 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/dgraph/DgraphGraphServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/dgraph/DgraphGraphServiceTest.java @@ -822,7 +822,7 @@ public void testGetDestinationUrnsFromResponseData() { } @Override - public void testPopulatedGraphServiceGetLineageMultihop(boolean attemptMultiHop) { + public void testPopulatedGraphServiceGetLineageMultihop(Boolean attemptMultiHop) { // TODO: Remove this overridden method once the multihop for dGraph is implemented! } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphServiceTest.java index a58fafabdac91..22d4ed56b5a93 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphServiceTest.java @@ -232,65 +232,65 @@ public void testGetLineage() { List edges = Arrays.asList( // d1 <-Consumes- dj1 -Produces-> d2 <-DownstreamOf- d3 <-DownstreamOf- d5 - new Edge(dataJobOneUrn, datasetOneUrn, consumes, 1L, null, 3L, null, null), - new Edge(dataJobOneUrn, datasetTwoUrn, produces, 5L, null, 7L, null, null), - new Edge(datasetThreeUrn, datasetTwoUrn, downstreamOf, 9L, null, null, null, null), - new Edge(datasetFiveUrn, datasetThreeUrn, downstreamOf, 11L, null, null, null, null), + new Edge(dataJobOneUrn, dataset1Urn, consumes, 1L, null, 3L, null, null), + new Edge(dataJobOneUrn, dataset2Urn, produces, 5L, null, 7L, null, null), + new Edge(dataset3Urn, dataset2Urn, downstreamOf, 9L, null, null, null, null), + new Edge(dataset5Urn, dataset3Urn, downstreamOf, 11L, null, null, null, null), // another path between d2 and d5 which is shorter // d1 <-DownstreamOf- d4 <-DownstreamOf- d5 - new Edge(datasetFourUrn, datasetOneUrn, downstreamOf, 13L, null, 13L, null, null), - new Edge(datasetFiveUrn, datasetFourUrn, downstreamOf, 13L, null, 13L, null, null)); + new Edge(dataset4Urn, dataset1Urn, downstreamOf, 13L, null, 13L, null, null), + new Edge(dataset5Urn, dataset4Urn, downstreamOf, 13L, null, 13L, null, null)); edges.forEach(service::addEdge); // simple path finding final var upstreamLineageDataset3Hop3 = - service.getLineage(datasetThreeUrn, LineageDirection.UPSTREAM, 0, 1000, 3); + service.getLineage(dataset3Urn, LineageDirection.UPSTREAM, 0, 1000, 3); assertEquals(upstreamLineageDataset3Hop3.getTotal().intValue(), 3); assertEquals( getPathUrnArraysFromLineageResult(upstreamLineageDataset3Hop3), Set.of( - new UrnArray(datasetThreeUrn, datasetTwoUrn), - new UrnArray(datasetThreeUrn, datasetTwoUrn, dataJobOneUrn), - new UrnArray(datasetThreeUrn, datasetTwoUrn, dataJobOneUrn, datasetOneUrn))); + new UrnArray(dataset3Urn, dataset2Urn), + new UrnArray(dataset3Urn, dataset2Urn, dataJobOneUrn), + new UrnArray(dataset3Urn, dataset2Urn, dataJobOneUrn, dataset1Urn))); // simple path finding final var upstreamLineageDatasetFiveHop2 = - service.getLineage(datasetFiveUrn, LineageDirection.UPSTREAM, 0, 1000, 2); + service.getLineage(dataset5Urn, LineageDirection.UPSTREAM, 0, 1000, 2); assertEquals(upstreamLineageDatasetFiveHop2.getTotal().intValue(), 4); assertEquals( getPathUrnArraysFromLineageResult(upstreamLineageDatasetFiveHop2), Set.of( - new UrnArray(datasetFiveUrn, datasetThreeUrn), - new UrnArray(datasetFiveUrn, datasetThreeUrn, datasetTwoUrn), - new UrnArray(datasetFiveUrn, datasetFourUrn), - new UrnArray(datasetFiveUrn, datasetFourUrn, datasetOneUrn))); + new UrnArray(dataset5Urn, dataset3Urn), + new UrnArray(dataset5Urn, dataset3Urn, dataset2Urn), + new UrnArray(dataset5Urn, dataset4Urn), + new UrnArray(dataset5Urn, dataset4Urn, dataset1Urn))); // there are two paths from p5 to p1, one longer and one shorter, and the longer one is // discarded from result final var upstreamLineageDataset5Hop5 = - service.getLineage(datasetFiveUrn, LineageDirection.UPSTREAM, 0, 1000, 5); + service.getLineage(dataset5Urn, LineageDirection.UPSTREAM, 0, 1000, 5); assertEquals(upstreamLineageDataset5Hop5.getTotal().intValue(), 5); assertEquals( getPathUrnArraysFromLineageResult(upstreamLineageDataset5Hop5), Set.of( - new UrnArray(datasetFiveUrn, datasetThreeUrn), - new UrnArray(datasetFiveUrn, datasetThreeUrn, datasetTwoUrn), - new UrnArray(datasetFiveUrn, datasetThreeUrn, datasetTwoUrn, dataJobOneUrn), - new UrnArray(datasetFiveUrn, datasetFourUrn), - new UrnArray(datasetFiveUrn, datasetFourUrn, datasetOneUrn))); + new UrnArray(dataset5Urn, dataset3Urn), + new UrnArray(dataset5Urn, dataset3Urn, dataset2Urn), + new UrnArray(dataset5Urn, dataset3Urn, dataset2Urn, dataJobOneUrn), + new UrnArray(dataset5Urn, dataset4Urn), + new UrnArray(dataset5Urn, dataset4Urn, dataset1Urn))); // downstream lookup final var downstreamLineageDataset1Hop2 = - service.getLineage(datasetOneUrn, LineageDirection.DOWNSTREAM, 0, 1000, 2); + service.getLineage(dataset1Urn, LineageDirection.DOWNSTREAM, 0, 1000, 2); assertEquals(downstreamLineageDataset1Hop2.getTotal().intValue(), 4); assertEquals( getPathUrnArraysFromLineageResult(downstreamLineageDataset1Hop2), Set.of( - new UrnArray(datasetOneUrn, dataJobOneUrn), - new UrnArray(datasetOneUrn, dataJobOneUrn, datasetTwoUrn), - new UrnArray(datasetOneUrn, datasetFourUrn), - new UrnArray(datasetOneUrn, datasetFourUrn, datasetFiveUrn))); + new UrnArray(dataset1Urn, dataJobOneUrn), + new UrnArray(dataset1Urn, dataJobOneUrn, dataset2Urn), + new UrnArray(dataset1Urn, dataset4Urn), + new UrnArray(dataset1Urn, dataset4Urn, dataset5Urn))); } @Test @@ -300,27 +300,27 @@ public void testGetLineageTimeFilterQuery() throws Exception { List edges = Arrays.asList( // d1 <-Consumes- dj1 -Produces-> d2 <-DownstreamOf- d3 <-DownstreamOf- d4 - new Edge(dataJobOneUrn, datasetOneUrn, consumes, 1L, null, 3L, null, null), - new Edge(dataJobOneUrn, datasetTwoUrn, produces, 5L, null, 7L, null, null), - new Edge(datasetThreeUrn, datasetTwoUrn, downstreamOf, 9L, null, null, null, null), - new Edge(datasetFourUrn, datasetThreeUrn, downstreamOf, 11L, null, null, null, null)); + new Edge(dataJobOneUrn, dataset1Urn, consumes, 1L, null, 3L, null, null), + new Edge(dataJobOneUrn, dataset2Urn, produces, 5L, null, 7L, null, null), + new Edge(dataset3Urn, dataset2Urn, downstreamOf, 9L, null, null, null, null), + new Edge(dataset4Urn, dataset3Urn, downstreamOf, 11L, null, null, null, null)); edges.forEach(service::addEdge); // no time filtering EntityLineageResult upstreamLineageTwoHops = - service.getLineage(datasetFourUrn, LineageDirection.UPSTREAM, 0, 1000, 2); + service.getLineage(dataset4Urn, LineageDirection.UPSTREAM, 0, 1000, 2); assertEquals(upstreamLineageTwoHops.getTotal().intValue(), 2); assertEquals(upstreamLineageTwoHops.getRelationships().size(), 2); assertEquals( getPathUrnArraysFromLineageResult(upstreamLineageTwoHops), Set.of( - new UrnArray(datasetFourUrn, datasetThreeUrn), - new UrnArray(datasetFourUrn, datasetThreeUrn, datasetTwoUrn))); + new UrnArray(dataset4Urn, dataset3Urn), + new UrnArray(dataset4Urn, dataset3Urn, dataset2Urn))); // with time filtering EntityLineageResult upstreamLineageTwoHopsWithTimeFilter = service.getLineage( - datasetFourUrn, + dataset4Urn, LineageDirection.UPSTREAM, 0, 1000, @@ -330,12 +330,12 @@ public void testGetLineageTimeFilterQuery() throws Exception { assertEquals(upstreamLineageTwoHopsWithTimeFilter.getRelationships().size(), 1); assertEquals( getPathUrnArraysFromLineageResult(upstreamLineageTwoHopsWithTimeFilter), - Set.of(new UrnArray(datasetFourUrn, datasetThreeUrn))); + Set.of(new UrnArray(dataset4Urn, dataset3Urn))); // with time filtering EntityLineageResult upstreamLineageTimeFilter = service.getLineage( - datasetTwoUrn, + dataset2Urn, LineageDirection.UPSTREAM, 0, 1000, @@ -346,13 +346,13 @@ public void testGetLineageTimeFilterQuery() throws Exception { assertEquals( getPathUrnArraysFromLineageResult(upstreamLineageTimeFilter), Set.of( - new UrnArray(datasetTwoUrn, dataJobOneUrn), - new UrnArray(datasetTwoUrn, dataJobOneUrn, datasetOneUrn))); + new UrnArray(dataset2Urn, dataJobOneUrn), + new UrnArray(dataset2Urn, dataJobOneUrn, dataset1Urn))); // with time filtering EntityLineageResult downstreamLineageTimeFilter = service.getLineage( - datasetOneUrn, + dataset1Urn, LineageDirection.DOWNSTREAM, 0, 1000, @@ -362,7 +362,7 @@ public void testGetLineageTimeFilterQuery() throws Exception { assertEquals(downstreamLineageTimeFilter.getRelationships().size(), 1); assertEquals( getPathUrnArraysFromLineageResult(downstreamLineageTimeFilter), - Set.of(new UrnArray(datasetOneUrn, dataJobOneUrn))); + Set.of(new UrnArray(dataset1Urn, dataJobOneUrn))); } @Test @@ -372,28 +372,28 @@ public void testGetLineageTimeFilteringSkipsShorterButNonMatchingPaths() { List edges = Arrays.asList( // d1 <-Consumes- dj1 -Produces-> d2 <-DownstreamOf- d3 - new Edge(dataJobOneUrn, datasetOneUrn, consumes, 5L, null, 5L, null, null), - new Edge(dataJobOneUrn, datasetTwoUrn, produces, 7L, null, 7L, null, null), - new Edge(datasetThreeUrn, datasetTwoUrn, downstreamOf, 9L, null, null, null, null), + new Edge(dataJobOneUrn, dataset1Urn, consumes, 5L, null, 5L, null, null), + new Edge(dataJobOneUrn, dataset2Urn, produces, 7L, null, 7L, null, null), + new Edge(dataset3Urn, dataset2Urn, downstreamOf, 9L, null, null, null, null), // d1 <-DownstreamOf- d3 (shorter path from d3 to d1, but with very old time) - new Edge(datasetThreeUrn, datasetOneUrn, downstreamOf, 1L, null, 2L, null, null)); + new Edge(dataset3Urn, dataset1Urn, downstreamOf, 1L, null, 2L, null, null)); edges.forEach(service::addEdge); // no time filtering, shorter path from d3 to d1 is returned EntityLineageResult upstreamLineageNoTimeFiltering = - service.getLineage(datasetThreeUrn, LineageDirection.UPSTREAM, 0, 1000, 3); + service.getLineage(dataset3Urn, LineageDirection.UPSTREAM, 0, 1000, 3); assertEquals( getPathUrnArraysFromLineageResult(upstreamLineageNoTimeFiltering), Set.of( - new UrnArray(datasetThreeUrn, datasetTwoUrn), - new UrnArray(datasetThreeUrn, datasetTwoUrn, dataJobOneUrn), - new UrnArray(datasetThreeUrn, datasetOneUrn))); + new UrnArray(dataset3Urn, dataset2Urn), + new UrnArray(dataset3Urn, dataset2Urn, dataJobOneUrn), + new UrnArray(dataset3Urn, dataset1Urn))); // with time filtering, shorter path from d3 to d1 is excluded so longer path is returned EntityLineageResult upstreamLineageTimeFiltering = service.getLineage( - datasetThreeUrn, + dataset3Urn, LineageDirection.UPSTREAM, 0, 1000, @@ -402,9 +402,9 @@ public void testGetLineageTimeFilteringSkipsShorterButNonMatchingPaths() { assertEquals( getPathUrnArraysFromLineageResult(upstreamLineageTimeFiltering), Set.of( - new UrnArray(datasetThreeUrn, datasetTwoUrn), - new UrnArray(datasetThreeUrn, datasetTwoUrn, dataJobOneUrn), - new UrnArray(datasetThreeUrn, datasetTwoUrn, dataJobOneUrn, datasetOneUrn))); + new UrnArray(dataset3Urn, dataset2Urn), + new UrnArray(dataset3Urn, dataset2Urn, dataJobOneUrn), + new UrnArray(dataset3Urn, dataset2Urn, dataJobOneUrn, dataset1Urn))); } @Override diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/search/SearchGraphServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/SearchGraphServiceTestBase.java index 8d08c1362a340..b389f8228a98d 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/search/SearchGraphServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/SearchGraphServiceTestBase.java @@ -336,60 +336,39 @@ public void testTimestampLineage() throws Exception { Arrays.asList( // One upstream edge new Edge( - datasetTwoUrn, - datasetOneUrn, - downstreamOf, - initialTime, - null, - initialTime, - null, - null), + dataset2Urn, dataset1Urn, downstreamOf, initialTime, null, initialTime, null, null), // Two downstream new Edge( - datasetThreeUrn, - datasetTwoUrn, - downstreamOf, - initialTime, - null, - initialTime, - null, - null), + dataset3Urn, dataset2Urn, downstreamOf, initialTime, null, initialTime, null, null), new Edge( - datasetFourUrn, - datasetTwoUrn, - downstreamOf, - initialTime, - null, - initialTime, - null, - null), + dataset4Urn, dataset2Urn, downstreamOf, initialTime, null, initialTime, null, null), // One with null values, should always be returned - new Edge(datasetFiveUrn, datasetTwoUrn, downstreamOf, null, null, null, null, null)); + new Edge(dataset5Urn, dataset2Urn, downstreamOf, null, null, null, null, null)); edges.forEach(getGraphService()::addEdge); syncAfterWrite(); // Without timestamps - EntityLineageResult upstreamResult = getUpstreamLineage(datasetTwoUrn, null, null); - EntityLineageResult downstreamResult = getDownstreamLineage(datasetTwoUrn, null, null); + EntityLineageResult upstreamResult = getUpstreamLineage(dataset2Urn, null, null); + EntityLineageResult downstreamResult = getDownstreamLineage(dataset2Urn, null, null); Assert.assertEquals(Integer.valueOf(1), upstreamResult.getTotal()); Assert.assertEquals(Integer.valueOf(3), downstreamResult.getTotal()); // Timestamp before - upstreamResult = getUpstreamLineage(datasetTwoUrn, 0L, initialTime - 10); - downstreamResult = getDownstreamLineage(datasetTwoUrn, 0L, initialTime - 10); + upstreamResult = getUpstreamLineage(dataset2Urn, 0L, initialTime - 10); + downstreamResult = getDownstreamLineage(dataset2Urn, 0L, initialTime - 10); Assert.assertEquals(Integer.valueOf(0), upstreamResult.getTotal()); Assert.assertEquals(Integer.valueOf(1), downstreamResult.getTotal()); // Timestamp after - upstreamResult = getUpstreamLineage(datasetTwoUrn, initialTime + 10, initialTime + 100); - downstreamResult = getDownstreamLineage(datasetTwoUrn, initialTime + 10, initialTime + 100); + upstreamResult = getUpstreamLineage(dataset2Urn, initialTime + 10, initialTime + 100); + downstreamResult = getDownstreamLineage(dataset2Urn, initialTime + 10, initialTime + 100); Assert.assertEquals(Integer.valueOf(0), upstreamResult.getTotal()); Assert.assertEquals(Integer.valueOf(1), downstreamResult.getTotal()); // Timestamp included - upstreamResult = getUpstreamLineage(datasetTwoUrn, initialTime - 10, initialTime + 10); - downstreamResult = getDownstreamLineage(datasetTwoUrn, initialTime - 10, initialTime + 10); + upstreamResult = getUpstreamLineage(dataset2Urn, initialTime - 10, initialTime + 10); + downstreamResult = getDownstreamLineage(dataset2Urn, initialTime - 10, initialTime + 10); Assert.assertEquals(Integer.valueOf(1), upstreamResult.getTotal()); Assert.assertEquals(Integer.valueOf(3), downstreamResult.getTotal()); @@ -398,17 +377,10 @@ public void testTimestampLineage() throws Exception { edges = Arrays.asList( new Edge( - datasetTwoUrn, - datasetOneUrn, - downstreamOf, - initialTime, - null, - updatedTime, - null, - null), + dataset2Urn, dataset1Urn, downstreamOf, initialTime, null, updatedTime, null, null), new Edge( - datasetThreeUrn, - datasetTwoUrn, + dataset3Urn, + dataset2Urn, downstreamOf, initialTime, null, @@ -420,20 +392,20 @@ public void testTimestampLineage() throws Exception { syncAfterWrite(); // Without timestamps - upstreamResult = getUpstreamLineage(datasetTwoUrn, null, null); - downstreamResult = getDownstreamLineage(datasetTwoUrn, null, null); + upstreamResult = getUpstreamLineage(dataset2Urn, null, null); + downstreamResult = getDownstreamLineage(dataset2Urn, null, null); Assert.assertEquals(Integer.valueOf(1), upstreamResult.getTotal()); Assert.assertEquals(Integer.valueOf(3), downstreamResult.getTotal()); // Window includes initial time and updated time - upstreamResult = getUpstreamLineage(datasetTwoUrn, initialTime - 10, updatedTime + 10); - downstreamResult = getDownstreamLineage(datasetTwoUrn, initialTime - 10, updatedTime + 10); + upstreamResult = getUpstreamLineage(dataset2Urn, initialTime - 10, updatedTime + 10); + downstreamResult = getDownstreamLineage(dataset2Urn, initialTime - 10, updatedTime + 10); Assert.assertEquals(Integer.valueOf(1), upstreamResult.getTotal()); Assert.assertEquals(Integer.valueOf(3), downstreamResult.getTotal()); // Window includes updated time but not initial time - upstreamResult = getUpstreamLineage(datasetTwoUrn, initialTime + 10, updatedTime + 10); - downstreamResult = getDownstreamLineage(datasetTwoUrn, initialTime + 10, updatedTime + 10); + upstreamResult = getUpstreamLineage(dataset2Urn, initialTime + 10, updatedTime + 10); + downstreamResult = getDownstreamLineage(dataset2Urn, initialTime + 10, updatedTime + 10); Assert.assertEquals(Integer.valueOf(1), upstreamResult.getTotal()); Assert.assertEquals(Integer.valueOf(2), downstreamResult.getTotal()); } @@ -447,7 +419,16 @@ public void testTimestampLineage() throws Exception { * @return The Upstream lineage for urn from the window from startTime to endTime */ private EntityLineageResult getUpstreamLineage(Urn urn, Long startTime, Long endTime) { - return getLineage(urn, LineageDirection.UPSTREAM, startTime, endTime, null); + return getLineage(urn, LineageDirection.UPSTREAM, startTime, endTime, 0, null); + } + + private EntityLineageResult getUpstreamLineage(Urn urn, Long startTime, Long endTime, int count) { + return getLineage(urn, LineageDirection.UPSTREAM, startTime, endTime, count, null); + } + + private EntityLineageResult getUpstreamLineage( + Urn urn, Long startTime, Long endTime, int count, int exploreLimit) { + return getLineage(urn, LineageDirection.UPSTREAM, startTime, endTime, count, exploreLimit); } /** @@ -459,7 +440,7 @@ private EntityLineageResult getUpstreamLineage(Urn urn, Long startTime, Long end * @return The Downstream lineage for urn from the window from startTime to endTime */ private EntityLineageResult getDownstreamLineage(Urn urn, Long startTime, Long endTime) { - return getLineage(urn, LineageDirection.DOWNSTREAM, startTime, endTime, null); + return getLineage(urn, LineageDirection.DOWNSTREAM, startTime, endTime, 0, null); } /** @@ -476,13 +457,14 @@ private EntityLineageResult getLineage( LineageDirection direction, Long startTime, Long endTime, + int count, @Nullable Integer entitiesExploredPerHopLimit) { return getGraphService() .getLineage( urn, direction, 0, - 0, + count, 3, new LineageFlags() .setStartTimeMillis(startTime, SetMode.REMOVE_IF_NULL) From 3e39129f7b9332c246285b3a5f61531a74d6608b Mon Sep 17 00:00:00 2001 From: Valerii Date: Mon, 1 Apr 2024 21:22:47 +0300 Subject: [PATCH 02/17] fix(ingest/tableau) Fix Tableau lineage ingestion from Clickhouse (#10167) --- .../src/datahub/ingestion/source/tableau_common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py b/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py index e547934bc4a2d..881f6c63e094d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py @@ -573,7 +573,7 @@ def get_fully_qualified_table_name( .replace("`", "") ) - if platform in ("athena", "hive", "mysql"): + if platform in ("athena", "hive", "mysql", "clickhouse"): # it two tier database system (athena, hive, mysql), just take final 2 fully_qualified_table_name = ".".join( fully_qualified_table_name.split(".")[-2:] From 14bbc0b5909f7a205811914bd26226fbeb40e367 Mon Sep 17 00:00:00 2001 From: Christian Groll Date: Mon, 1 Apr 2024 21:57:52 +0200 Subject: [PATCH 03/17] [oracle ingestion]: get database name when using service (#10158) --- .../datahub/ingestion/source/sql/oracle.py | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py index bcf0f26008ae3..0a67d6228e6db 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py @@ -122,6 +122,17 @@ def __init__(self, inspector_instance: Inspector): # tables that we don't want to ingest into the DataHub self.exclude_tablespaces: Tuple[str, str] = ("SYSTEM", "SYSAUX") + def get_db_name(self) -> str: + try: + # Try to retrieve current DB name by executing query + db_name = self._inspector_instance.bind.execute( + sql.text("select sys_context('USERENV','DB_NAME') from dual") + ).scalar() + return str(db_name) + except sqlalchemy.exc.DatabaseError as e: + logger.error("Error fetching DB name: " + str(e)) + return "" + def get_schema_names(self) -> List[str]: cursor = self._inspector_instance.bind.execute( sql.text("SELECT username FROM dba_users ORDER BY username") @@ -582,6 +593,22 @@ def create(cls, config_dict, ctx): config = OracleConfig.parse_obj(config_dict) return cls(config, ctx) + def get_db_name(self, inspector: Inspector) -> str: + """ + This overwrites the default implementation, which only tries to read + database name from Connection URL, which does not work when using + service instead of database. + In that case, it tries to retrieve the database name by sending a query to the DB. + """ + + # call default implementation first + db_name = super().get_db_name(inspector) + + if db_name == "" and isinstance(inspector, OracleInspectorObjectWrapper): + db_name = inspector.get_db_name() + + return db_name + def get_inspectors(self) -> Iterable[Inspector]: for inspector in super().get_inspectors(): event.listen( From ef637ccb37ad139fc1d28144547419879b491d41 Mon Sep 17 00:00:00 2001 From: RyanHolstien Date: Mon, 1 Apr 2024 15:01:09 -0500 Subject: [PATCH 04/17] fix(docker): fix versioning for compose file post release (#10176) --- .../docker-compose-without-neo4j.override.yml | 2 +- docker/docker-compose.override.yml | 2 +- docker/mysql/docker-compose.mysql.yml | 2 +- ...ocker-compose-without-neo4j.quickstart.yml | 2 +- .../quickstart/docker-compose.quickstart.yml | 2 +- .../quickstart_version_mapping.yaml | 6 +++--- .../src/datahub/cli/quickstart_versioning.py | 8 ++++---- .../cli/test_quickstart_version_mapping.py | 20 +++++++++---------- 8 files changed, 22 insertions(+), 22 deletions(-) diff --git a/docker/docker-compose-without-neo4j.override.yml b/docker/docker-compose-without-neo4j.override.yml index 5b31a54e6473f..37ae41e383e7c 100644 --- a/docker/docker-compose-without-neo4j.override.yml +++ b/docker/docker-compose-without-neo4j.override.yml @@ -45,7 +45,7 @@ services: - DATAHUB_PRECREATE_TOPICS=${DATAHUB_PRECREATE_TOPICS:-false} mysql: hostname: mysql - image: mysql:${DATAHUB_MYSQL_VERSION:-5.7} + image: mysql:${DATAHUB_MYSQL_VERSION:-8.2} command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin --default-authentication-plugin=mysql_native_password ports: - ${DATAHUB_MAPPED_MYSQL_PORT:-3306}:3306 diff --git a/docker/docker-compose.override.yml b/docker/docker-compose.override.yml index 1b314a76aa755..d443a3f4629df 100644 --- a/docker/docker-compose.override.yml +++ b/docker/docker-compose.override.yml @@ -30,7 +30,7 @@ services: - DATAHUB_PRECREATE_TOPICS=${DATAHUB_PRECREATE_TOPICS:-false} mysql: hostname: mysql - image: mysql:${DATAHUB_MYSQL_VERSION:-5.7} + image: mysql:${DATAHUB_MYSQL_VERSION:-8.2} command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin --default-authentication-plugin=mysql_native_password ports: - ${DATAHUB_MAPPED_MYSQL_PORT:-3306}:3306 diff --git a/docker/mysql/docker-compose.mysql.yml b/docker/mysql/docker-compose.mysql.yml index e60058a6b509b..d8c7767985000 100644 --- a/docker/mysql/docker-compose.mysql.yml +++ b/docker/mysql/docker-compose.mysql.yml @@ -4,7 +4,7 @@ version: '3.8' services: mysql: hostname: mysql - image: mysql:${DATAHUB_MYSQL_VERSION:-5.7} + image: mysql:${DATAHUB_MYSQL_VERSION:-8.2} env_file: env/docker.env command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin ports: diff --git a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml index 6f7368a0a0826..176e5539aa491 100644 --- a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml +++ b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml @@ -210,7 +210,7 @@ services: test: mysqladmin ping -h mysql -u $$MYSQL_USER --password=$$MYSQL_PASSWORD timeout: 5s hostname: mysql - image: mysql:${DATAHUB_MYSQL_VERSION:-5.7} + image: mysql:${DATAHUB_MYSQL_VERSION:-8.2} ports: - ${DATAHUB_MAPPED_MYSQL_PORT:-3306}:3306 restart: on-failure diff --git a/docker/quickstart/docker-compose.quickstart.yml b/docker/quickstart/docker-compose.quickstart.yml index 586c0cf23f48c..e39695f52a437 100644 --- a/docker/quickstart/docker-compose.quickstart.yml +++ b/docker/quickstart/docker-compose.quickstart.yml @@ -217,7 +217,7 @@ services: test: mysqladmin ping -h mysql -u $$MYSQL_USER --password=$$MYSQL_PASSWORD timeout: 5s hostname: mysql - image: mysql:${DATAHUB_MYSQL_VERSION:-5.7} + image: mysql:${DATAHUB_MYSQL_VERSION:-8.2} ports: - ${DATAHUB_MAPPED_MYSQL_PORT:-3306}:3306 restart: on-failure diff --git a/docker/quickstart/quickstart_version_mapping.yaml b/docker/quickstart/quickstart_version_mapping.yaml index b08cfda175aa9..d9b7b4d661f06 100644 --- a/docker/quickstart/quickstart_version_mapping.yaml +++ b/docker/quickstart/quickstart_version_mapping.yaml @@ -23,7 +23,7 @@ quickstart_version_map: default: composefile_git_ref: master docker_tag: head - mysql_tag: "5.7" + mysql_tag: "8.2" # default: # Use this to pin default to a specific version. # composefile_git_ref: fd1bd51541a132017a648f4a2f037eec8f70ba26 # v0.10.0 + quickstart compose file fixes # docker_tag: v0.10.0 @@ -31,12 +31,12 @@ quickstart_version_map: head: composefile_git_ref: master docker_tag: head - mysql_tag: "5.7" + mysql_tag: "8.2" # v0.13.0 we upgraded MySQL image for EOL v0.13.0: composefile_git_ref: master - docker_tag: head + docker_tag: v0.13.0 mysql_tag: "8.2" # v0.9.6 images contain security vulnerabilities diff --git a/metadata-ingestion/src/datahub/cli/quickstart_versioning.py b/metadata-ingestion/src/datahub/cli/quickstart_versioning.py index 1c3ce93c1f788..493869ac77bb8 100644 --- a/metadata-ingestion/src/datahub/cli/quickstart_versioning.py +++ b/metadata-ingestion/src/datahub/cli/quickstart_versioning.py @@ -82,7 +82,7 @@ def fetch_quickstart_config(cls) -> "QuickstartVersionMappingConfig": return QuickstartVersionMappingConfig( quickstart_version_map={ "default": QuickstartExecutionPlan( - composefile_git_ref="master", docker_tag="head", mysql_tag="5.7" + composefile_git_ref="master", docker_tag="head", mysql_tag="8.2" ), } ) @@ -94,7 +94,7 @@ def fetch_quickstart_config(cls) -> "QuickstartVersionMappingConfig": try: release = cls._fetch_latest_version() config.quickstart_version_map["stable"] = QuickstartExecutionPlan( - composefile_git_ref=release, docker_tag=release, mysql_tag="5.7" + composefile_git_ref=release, docker_tag=release, mysql_tag="8.2" ) except Exception: click.echo( @@ -116,8 +116,8 @@ def get_quickstart_execution_plan( requested_version = "default" composefile_git_ref = requested_version docker_tag = requested_version - # Default to 5.7 if not specified in version map - mysql_tag = "5.7" + # Default to 8.2 if not specified in version map + mysql_tag = "8.2" result = self.quickstart_version_map.get( requested_version, QuickstartExecutionPlan( diff --git a/metadata-ingestion/tests/unit/cli/test_quickstart_version_mapping.py b/metadata-ingestion/tests/unit/cli/test_quickstart_version_mapping.py index 3b06e48522955..38f3451a191a4 100644 --- a/metadata-ingestion/tests/unit/cli/test_quickstart_version_mapping.py +++ b/metadata-ingestion/tests/unit/cli/test_quickstart_version_mapping.py @@ -9,7 +9,7 @@ "default": { "composefile_git_ref": "master", "docker_tag": "latest", - "mysql_tag": "5.7", + "mysql_tag": "8.2", }, "v0.9.6": { "composefile_git_ref": "v0.9.6.1", @@ -19,17 +19,17 @@ "v2.0.0": { "composefile_git_ref": "v2.0.1", "docker_tag": "v2.0.0", - "mysql_tag": "5.7", + "mysql_tag": "8.2", }, "v1.0.0": { "composefile_git_ref": "v1.0.0", "docker_tag": "v1.0.0", - "mysql_tag": "5.7", + "mysql_tag": "8.2", }, "stable": { "composefile_git_ref": "v1.0.1", "docker_tag": "latest", - "mysql_tag": "5.7", + "mysql_tag": "8.2", }, }, } @@ -41,7 +41,7 @@ def test_quickstart_version_config(): expected = QuickstartExecutionPlan( docker_tag="v1.0.0", composefile_git_ref="v1.0.0", - mysql_tag="5.7", + mysql_tag="8.2", ) assert execution_plan == expected @@ -51,7 +51,7 @@ def test_quickstart_version_config_default(): expected = QuickstartExecutionPlan( docker_tag="v2.0.0", composefile_git_ref="v2.0.1", - mysql_tag="5.7", + mysql_tag="8.2", ) assert execution_plan == expected @@ -59,7 +59,7 @@ def test_quickstart_version_config_default(): def test_quickstart_version_config_stable(): execution_plan = example_version_mapper.get_quickstart_execution_plan("stable") expected = QuickstartExecutionPlan( - docker_tag="latest", composefile_git_ref="v1.0.1", mysql_tag="5.7" + docker_tag="latest", composefile_git_ref="v1.0.1", mysql_tag="8.2" ) assert execution_plan == expected @@ -68,13 +68,13 @@ def test_quickstart_forced_stable(): example_version_mapper.quickstart_version_map["default"] = QuickstartExecutionPlan( composefile_git_ref="v1.0.1", docker_tag="latest", - mysql_tag="5.7", + mysql_tag="8.2", ) execution_plan = example_version_mapper.get_quickstart_execution_plan(None) expected = QuickstartExecutionPlan( docker_tag="latest", composefile_git_ref="v1.0.1", - mysql_tag="5.7", + mysql_tag="8.2", ) assert execution_plan == expected @@ -92,7 +92,7 @@ def test_quickstart_forced_not_a_version_tag(): expected = QuickstartExecutionPlan( docker_tag="NOT A VERSION", composefile_git_ref="NOT A VERSION", - mysql_tag="5.7", + mysql_tag="8.2", ) assert execution_plan == expected From 9a0a53bbe44d0ca5ea420c1654f87c4f7c636c67 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Mon, 1 Apr 2024 15:54:37 -0500 Subject: [PATCH 05/17] fix(restoreIndices): batchSize vs limit (#10178) --- .../upgrade/restoreindices/SendMAEStep.java | 5 +- .../ReindexDataJobViaNodesCLLStep.java | 21 +++--- .../DatahubUpgradeNonBlockingTest.java | 64 +++++++++++++++++++ .../linkedin/metadata/entity/AspectDao.java | 3 +- .../metadata/entity/EntityServiceImpl.java | 48 +++++++------- .../entity/cassandra/CassandraAspectDao.java | 3 +- .../metadata/entity/ebean/EbeanAspectDao.java | 43 ++++++++++--- .../metadata/entity/EntityServiceTest.java | 14 ++-- .../kafka/MceConsumerApplicationTest.java | 3 +- .../elastic/OperationsController.java | 27 +++++--- .../com.linkedin.entity.aspects.restspec.json | 12 ++++ ...nkedin.operations.operations.restspec.json | 12 ++++ .../com.linkedin.entity.aspects.snapshot.json | 12 ++++ ...nkedin.operations.operations.snapshot.json | 12 ++++ .../resources/entity/AspectResource.java | 7 +- .../operations/OperationsResource.java | 10 +-- .../metadata/resources/operations/Utils.java | 24 +++++-- .../metadata/entity/EntityService.java | 3 +- .../restoreindices/RestoreIndicesArgs.java | 50 +++++++++------ 19 files changed, 275 insertions(+), 98 deletions(-) create mode 100644 datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeNonBlockingTest.java diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/SendMAEStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/SendMAEStep.java index aca27892d2e3a..83bc96ad449d1 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/SendMAEStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/SendMAEStep.java @@ -48,7 +48,7 @@ public KafkaJob(UpgradeContext context, RestoreIndicesArgs args) { @Override public RestoreIndicesResult call() { - return _entityService.restoreIndices(args, context.report()::addLine); + return _entityService.streamRestoreIndices(args, context.report()::addLine).findFirst().get(); } } @@ -85,7 +85,10 @@ private List iterateFutures(List entityService; @@ -33,13 +32,17 @@ public Function executable() { return (context) -> { RestoreIndicesArgs args = new RestoreIndicesArgs() - .setAspectName(DATA_JOB_INPUT_OUTPUT_ASPECT_NAME) - .setUrnLike("urn:li:" + DATA_JOB_ENTITY_NAME + ":%") - .setBatchSize(batchSize); - RestoreIndicesResult result = - entityService.restoreIndices(args, x -> context.report().addLine((String) x)); - context.report().addLine("Rows migrated: " + result.rowsMigrated); - context.report().addLine("Rows ignored: " + result.ignored); + .aspectName(DATA_JOB_INPUT_OUTPUT_ASPECT_NAME) + .urnLike("urn:li:" + DATA_JOB_ENTITY_NAME + ":%") + .batchSize(batchSize); + + entityService + .streamRestoreIndices(args, x -> context.report().addLine((String) x)) + .forEach( + result -> { + context.report().addLine("Rows migrated: " + result.rowsMigrated); + context.report().addLine("Rows ignored: " + result.ignored); + }); BootstrapStep.setUpgradeResult(UPGRADE_ID_URN, entityService); context.report().addLine("State updated: " + UPGRADE_ID_URN); diff --git a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeNonBlockingTest.java b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeNonBlockingTest.java new file mode 100644 index 0000000000000..e1257df9ad748 --- /dev/null +++ b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeNonBlockingTest.java @@ -0,0 +1,64 @@ +package com.linkedin.datahub.upgrade; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.testng.AssertJUnit.assertNotNull; + +import com.linkedin.datahub.upgrade.impl.DefaultUpgradeManager; +import com.linkedin.datahub.upgrade.system.SystemUpdateNonBlocking; +import com.linkedin.datahub.upgrade.system.vianodes.ReindexDataJobViaNodesCLL; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.restoreindices.RestoreIndicesArgs; +import java.util.List; +import javax.inject.Named; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.test.context.ActiveProfiles; +import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; +import org.testng.annotations.Test; + +@ActiveProfiles("test") +@SpringBootTest( + classes = {UpgradeCliApplication.class, UpgradeCliApplicationTestConfiguration.class}, + properties = { + "BOOTSTRAP_SYSTEM_UPDATE_DATA_JOB_NODE_CLL_ENABLED=true", + "kafka.schemaRegistry.type=INTERNAL", + "DATAHUB_UPGRADE_HISTORY_TOPIC_NAME=test_due_topic", + "METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME=test_mcl_versioned_topic" + }, + args = {"-u", "SystemUpdateNonBlocking"}) +public class DatahubUpgradeNonBlockingTest extends AbstractTestNGSpringContextTests { + + @Autowired(required = false) + @Named("systemUpdateNonBlocking") + private SystemUpdateNonBlocking systemUpdateNonBlocking; + + @Autowired + @Test + public void testSystemUpdateNonBlockingInit() { + assertNotNull(systemUpdateNonBlocking); + } + + @Test + public void testReindexDataJobViaNodesCLLPaging() { + EntityService mockService = mock(EntityService.class); + ReindexDataJobViaNodesCLL cllUpgrade = new ReindexDataJobViaNodesCLL(mockService, true, 10); + SystemUpdateNonBlocking upgrade = + new SystemUpdateNonBlocking(List.of(), List.of(cllUpgrade), null); + DefaultUpgradeManager manager = new DefaultUpgradeManager(); + manager.register(upgrade); + manager.execute("SystemUpdateNonBlocking", List.of()); + verify(mockService, times(1)) + .streamRestoreIndices( + eq( + new RestoreIndicesArgs() + .batchSize(10) + .limit(0) + .aspectName("dataJobInputOutput") + .urnLike("urn:li:dataJob:%")), + any()); + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/AspectDao.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/AspectDao.java index b031377842176..e836b69ef4305 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/AspectDao.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/AspectDao.java @@ -5,7 +5,6 @@ import com.linkedin.metadata.entity.ebean.EbeanAspectV2; import com.linkedin.metadata.entity.restoreindices.RestoreIndicesArgs; import com.linkedin.metadata.utils.metrics.MetricUtils; -import io.ebean.PagedList; import io.ebean.Transaction; import java.sql.Timestamp; import java.util.List; @@ -106,7 +105,7 @@ ListResult listUrns( Integer countAspect(@Nonnull final String aspectName, @Nullable String urnLike); @Nonnull - PagedList getPagedAspects(final RestoreIndicesArgs args); + Stream> streamAspectBatches(final RestoreIndicesArgs args); @Nonnull Stream streamAspects(String entityName, String aspectName); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java index 7f11170d12e72..754c5f272e275 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java @@ -49,7 +49,6 @@ import com.linkedin.metadata.aspect.plugins.validation.ValidationExceptionCollection; import com.linkedin.metadata.aspect.utils.DefaultAspectsUtil; import com.linkedin.metadata.config.PreProcessHooks; -import com.linkedin.metadata.entity.ebean.EbeanAspectV2; import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; import com.linkedin.metadata.entity.ebean.batch.DeleteItemImpl; @@ -76,7 +75,6 @@ import com.linkedin.mxe.SystemMetadata; import com.linkedin.r2.RemoteInvocationException; import com.linkedin.util.Pair; -import io.ebean.PagedList; import io.ebean.Transaction; import io.opentelemetry.extension.annotations.WithSpan; import java.net.URISyntaxException; @@ -1177,38 +1175,38 @@ public Integer getCountAspect(@Nonnull String aspectName, @Nullable String urnLi @Nonnull @Override - public RestoreIndicesResult restoreIndices( + public Stream streamRestoreIndices( @Nonnull RestoreIndicesArgs args, @Nonnull Consumer logger) { logger.accept(String.format("Args are %s", args)); logger.accept( String.format( - "Reading rows %s through %s from the aspects table started.", - args.start, args.start + args.batchSize)); - long startTime = System.currentTimeMillis(); - PagedList rows = aspectDao.getPagedAspects(args); - long timeSqlQueryMs = System.currentTimeMillis() - startTime; + "Reading rows %s through %s (0 == infinite) in batches of %s from the aspects table started.", + args.start, args.limit, args.batchSize)); - logger.accept( - String.format( - "Reading rows %s through %s from the aspects table completed.", - args.start, args.start + args.batchSize)); + long startTime = System.currentTimeMillis(); + return aspectDao + .streamAspectBatches(args) + .map( + batchStream -> { + long timeSqlQueryMs = System.currentTimeMillis() - startTime; - List systemAspects = - EntityUtils.toSystemAspectFromEbeanAspects( - rows != null ? rows.getList() : List.of(), this); + List systemAspects = + EntityUtils.toSystemAspectFromEbeanAspects( + batchStream.collect(Collectors.toList()), this); - RestoreIndicesResult result = restoreIndices(systemAspects, logger); + RestoreIndicesResult result = restoreIndices(systemAspects, logger); + result.timeSqlQueryMs = timeSqlQueryMs; - try { - TimeUnit.MILLISECONDS.sleep(args.batchDelayMs); - } catch (InterruptedException e) { - throw new RuntimeException( - "Thread interrupted while sleeping after successful batch migration."); - } - - result.timeSqlQueryMs = timeSqlQueryMs; - return result; + logger.accept("Batch completed."); + try { + TimeUnit.MILLISECONDS.sleep(args.batchDelayMs); + } catch (InterruptedException e) { + throw new RuntimeException( + "Thread interrupted while sleeping after successful batch migration."); + } + return result; + }); } @Nonnull diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraAspectDao.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraAspectDao.java index c1e76e7c67836..71b9b9ad86f72 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraAspectDao.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraAspectDao.java @@ -34,7 +34,6 @@ import com.linkedin.metadata.query.ExtraInfo; import com.linkedin.metadata.query.ExtraInfoArray; import com.linkedin.metadata.query.ListResultMetadata; -import io.ebean.PagedList; import io.ebean.Transaction; import java.net.URISyntaxException; import java.nio.charset.StandardCharsets; @@ -492,7 +491,7 @@ public Integer countAspect(@Nonnull String aspectName, @Nullable String urnLike) } @Nonnull - public PagedList getPagedAspects(final RestoreIndicesArgs args) { + public Stream> streamAspectBatches(final RestoreIndicesArgs args) { // Not implemented return null; } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java index 23d443c10b71f..161218b6707dc 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java @@ -8,6 +8,7 @@ import com.google.common.cache.CacheBuilder; import com.google.common.cache.CacheLoader; import com.google.common.cache.LoadingCache; +import com.google.common.collect.Iterators; import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; import com.linkedin.metadata.aspect.AspectRetriever; @@ -43,10 +44,12 @@ import java.net.URISyntaxException; import java.sql.Timestamp; import java.time.Clock; +import java.time.Instant; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; +import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -58,6 +61,7 @@ import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.Stream; +import java.util.stream.StreamSupport; import javax.annotation.Nonnull; import javax.annotation.Nullable; import javax.persistence.PersistenceException; @@ -495,7 +499,7 @@ public Integer countAspect(@Nonnull String aspectName, @Nullable String urnLike) @Nonnull @Override - public PagedList getPagedAspects(final RestoreIndicesArgs args) { + public Stream> streamAspectBatches(final RestoreIndicesArgs args) { ExpressionList exp = _server .find(EbeanAspectV2.class) @@ -511,6 +515,15 @@ public PagedList getPagedAspects(final RestoreIndicesArgs args) { if (args.urnLike != null) { exp = exp.like(EbeanAspectV2.URN_COLUMN, args.urnLike); } + if (args.gePitEpochMs > 0) { + exp = + exp.ge( + EbeanAspectV2.CREATED_ON_COLUMN, + Timestamp.from(Instant.ofEpochMilli(args.gePitEpochMs))) + .le( + EbeanAspectV2.CREATED_ON_COLUMN, + Timestamp.from(Instant.ofEpochMilli(args.lePitEpochMs))); + } int start = args.start; if (args.urnBasedPagination) { @@ -531,13 +544,27 @@ public PagedList getPagedAspects(final RestoreIndicesArgs args) { } } - return exp.orderBy() - .asc(EbeanAspectV2.URN_COLUMN) - .orderBy() - .asc(EbeanAspectV2.ASPECT_COLUMN) - .setFirstRow(start) - .setMaxRows(args.batchSize) - .findPagedList(); + if (args.limit > 0) { + exp = exp.setMaxRows(args.limit); + } + + return partition( + exp.orderBy() + .asc(EbeanAspectV2.URN_COLUMN) + .orderBy() + .asc(EbeanAspectV2.ASPECT_COLUMN) + .setFirstRow(start) + .findStream(), + args.batchSize); + } + + private static Stream> partition(Stream source, int size) { + final Iterator it = source.iterator(); + final Iterator> partIt = + Iterators.transform(Iterators.partition(it, size), List::stream); + final Iterable> iterable = () -> partIt; + + return StreamSupport.stream(iterable.spliterator(), false); } @Override diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java index 25f9e4b28a32a..24707a4a6f32b 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java @@ -1578,13 +1578,13 @@ public void testRestoreIndices() throws Exception { clearInvocations(_mockProducer); RestoreIndicesArgs args = new RestoreIndicesArgs(); - args.setAspectName(UPSTREAM_LINEAGE_ASPECT_NAME); - args.setBatchSize(1); - args.setStart(0); - args.setBatchDelayMs(1L); - args.setNumThreads(1); - args.setUrn(urnStr); - _entityServiceImpl.restoreIndices(args, obj -> {}); + args.aspectName(UPSTREAM_LINEAGE_ASPECT_NAME); + args.batchSize(1); + args.start(0); + args.batchDelayMs(1L); + args.numThreads(1); + args.urn(urnStr); + _entityServiceImpl.streamRestoreIndices(args, obj -> {}).collect(Collectors.toList()); ArgumentCaptor mclCaptor = ArgumentCaptor.forClass(MetadataChangeLog.class); diff --git a/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTest.java b/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTest.java index bce8664689e2c..84a4f4e839a08 100644 --- a/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTest.java +++ b/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTest.java @@ -7,6 +7,7 @@ import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.restoreindices.RestoreIndicesResult; import io.datahubproject.metadata.jobs.common.health.kafka.KafkaHealthIndicator; +import java.util.stream.Stream; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.context.SpringBootTest; import org.springframework.boot.test.web.client.TestRestTemplate; @@ -30,7 +31,7 @@ public class MceConsumerApplicationTest extends AbstractTestNGSpringContextTests public void testRestliServletConfig() { RestoreIndicesResult mockResult = new RestoreIndicesResult(); mockResult.setRowsMigrated(100); - when(_mockEntityService.restoreIndices(any(), any())).thenReturn(mockResult); + when(_mockEntityService.streamRestoreIndices(any(), any())).thenReturn(Stream.of(mockResult)); String response = this.restTemplate.postForObject( diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/elastic/OperationsController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/elastic/OperationsController.java index f7c6b4ec071c4..e371dfaf1d8fa 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/elastic/OperationsController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/elastic/OperationsController.java @@ -250,13 +250,17 @@ public ResponseEntity explainSearchQuery( @Tag(name = "RestoreIndices") @GetMapping(path = "/restoreIndices", produces = MediaType.APPLICATION_JSON_VALUE) @Operation(summary = "Restore ElasticSearch indices from primary storage based on URNs.") - public ResponseEntity restoreIndices( + public ResponseEntity> restoreIndices( @RequestParam(required = false, name = "aspectName") @Nullable String aspectName, @RequestParam(required = false, name = "urn") @Nullable String urn, @RequestParam(required = false, name = "urnLike") @Nullable String urnLike, - @RequestParam(required = false, name = "batchSize", defaultValue = "100") @Nullable + @RequestParam(required = false, name = "batchSize", defaultValue = "500") @Nullable Integer batchSize, - @RequestParam(required = false, name = "start", defaultValue = "0") @Nullable Integer start) { + @RequestParam(required = false, name = "start", defaultValue = "0") @Nullable Integer start, + @RequestParam(required = false, name = "limit", defaultValue = "0") @Nullable Integer limit, + @RequestParam(required = false, name = "gePitEpochMs", defaultValue = "0") @Nullable + Long gePitEpochMs, + @RequestParam(required = false, name = "lePitEpochMs") @Nullable Long lePitEpochMs) { Authentication authentication = AuthenticationContext.getAuthentication(); if (!AuthUtil.isAPIAuthorized( @@ -266,16 +270,21 @@ public ResponseEntity restoreIndices( RestoreIndicesArgs args = new RestoreIndicesArgs() - .setAspectName(aspectName) - .setUrnLike(urnLike) - .setUrn( + .aspectName(aspectName) + .urnLike(urnLike) + .urn( Optional.ofNullable(urn) .map(urnStr -> UrnUtils.getUrn(urnStr).toString()) .orElse(null)) - .setStart(start) - .setBatchSize(batchSize); + .start(start) + .batchSize(batchSize) + .limit(limit) + .gePitEpochMs(gePitEpochMs) + .lePitEpochMs(lePitEpochMs); - return ResponseEntity.of(Optional.of(entityService.restoreIndices(args, log::info))); + return ResponseEntity.of( + Optional.of( + entityService.streamRestoreIndices(args, log::info).collect(Collectors.toList()))); } @Tag(name = "RestoreIndices") diff --git a/metadata-service/restli-api/src/main/idl/com.linkedin.entity.aspects.restspec.json b/metadata-service/restli-api/src/main/idl/com.linkedin.entity.aspects.restspec.json index 917540aca8728..32e7a0e58e535 100644 --- a/metadata-service/restli-api/src/main/idl/com.linkedin.entity.aspects.restspec.json +++ b/metadata-service/restli-api/src/main/idl/com.linkedin.entity.aspects.restspec.json @@ -110,6 +110,18 @@ "name" : "batchSize", "type" : "int", "optional" : true + }, { + "name" : "limit", + "type" : "int", + "optional" : true + }, { + "name" : "gePitEpochMs", + "type" : "long", + "optional" : true + }, { + "name" : "lePitEpochMs", + "type" : "long", + "optional" : true } ], "returns" : "string" } ], diff --git a/metadata-service/restli-api/src/main/idl/com.linkedin.operations.operations.restspec.json b/metadata-service/restli-api/src/main/idl/com.linkedin.operations.operations.restspec.json index 0fb6a18a7974b..ce5b2b27904ec 100644 --- a/metadata-service/restli-api/src/main/idl/com.linkedin.operations.operations.restspec.json +++ b/metadata-service/restli-api/src/main/idl/com.linkedin.operations.operations.restspec.json @@ -55,6 +55,18 @@ "name" : "batchSize", "type" : "int", "optional" : true + }, { + "name" : "limit", + "type" : "int", + "optional" : true + }, { + "name" : "gePitEpochMs", + "type" : "long", + "optional" : true + }, { + "name" : "lePitEpochMs", + "type" : "long", + "optional" : true } ], "returns" : "string" }, { diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json index 2a96e9963bf01..becdcdd0215fd 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json @@ -4136,6 +4136,18 @@ "name" : "batchSize", "type" : "int", "optional" : true + }, { + "name" : "limit", + "type" : "int", + "optional" : true + }, { + "name" : "gePitEpochMs", + "type" : "long", + "optional" : true + }, { + "name" : "lePitEpochMs", + "type" : "long", + "optional" : true } ], "returns" : "string" } ], diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json index d7199bed56d2c..0573a342da420 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json @@ -3782,6 +3782,18 @@ "name" : "batchSize", "type" : "int", "optional" : true + }, { + "name" : "limit", + "type" : "int", + "optional" : true + }, { + "name" : "gePitEpochMs", + "type" : "long", + "optional" : true + }, { + "name" : "lePitEpochMs", + "type" : "long", + "optional" : true } ], "returns" : "string" }, { diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java index 2f1e27dbe2575..21d688c7e6e1b 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java @@ -297,7 +297,10 @@ public Task restoreIndices( @ActionParam(PARAM_URN) @Optional @Nullable String urn, @ActionParam(PARAM_URN_LIKE) @Optional @Nullable String urnLike, @ActionParam("start") @Optional @Nullable Integer start, - @ActionParam("batchSize") @Optional @Nullable Integer batchSize) { + @ActionParam("batchSize") @Optional @Nullable Integer batchSize, + @ActionParam("limit") @Optional @Nullable Integer limit, + @ActionParam("gePitEpochMs") @Optional @Nullable Long gePitEpochMs, + @ActionParam("lePitEpochMs") @Optional @Nullable Long lePitEpochMs) { return RestliUtil.toTask( () -> { if (!isAPIAuthorized( @@ -308,7 +311,7 @@ public Task restoreIndices( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to update entities."); } return Utils.restoreIndices( - aspectName, urn, urnLike, start, batchSize, _authorizer, _entityService); + aspectName, urn, urnLike, start, batchSize, limit, gePitEpochMs, lePitEpochMs, _authorizer, _entityService); }, MetricRegistry.name(this.getClass(), "restoreIndices")); } diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/OperationsResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/OperationsResource.java index 13d88f30dd032..8b87923a6d423 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/OperationsResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/OperationsResource.java @@ -91,12 +91,12 @@ public Task restoreIndices( @ActionParam(PARAM_URN) @Optional @Nullable String urn, @ActionParam(PARAM_URN_LIKE) @Optional @Nullable String urnLike, @ActionParam("start") @Optional @Nullable Integer start, - @ActionParam("batchSize") @Optional @Nullable Integer batchSize) { + @ActionParam("batchSize") @Optional @Nullable Integer batchSize, + @ActionParam("limit") @Optional @Nullable Integer limit, + @ActionParam("gePitEpochMs") @Optional @Nullable Long gePitEpochMs, + @ActionParam("lePitEpochMs") @Optional @Nullable Long lePitEpochMs) { return RestliUtil.toTask( - () -> { - return Utils.restoreIndices( - aspectName, urn, urnLike, start, batchSize, _authorizer, _entityService); - }, + () -> Utils.restoreIndices(aspectName, urn, urnLike, start, batchSize, limit, gePitEpochMs, lePitEpochMs, _authorizer, _entityService), MetricRegistry.name(this.getClass(), "restoreIndices")); } diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/Utils.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/Utils.java index 5f999482cd859..d4f04bf62fbd8 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/Utils.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/Utils.java @@ -7,14 +7,15 @@ import com.datahub.plugins.auth.authorization.Authorizer; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; -import com.linkedin.metadata.authorization.Disjunctive; import com.linkedin.metadata.authorization.PoliciesConfig; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.restoreindices.RestoreIndicesArgs; +import com.linkedin.metadata.entity.restoreindices.RestoreIndicesResult; import com.linkedin.restli.common.HttpStatus; import com.linkedin.restli.server.RestLiServiceException; import java.util.HashMap; import java.util.Map; +import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; @@ -31,6 +32,9 @@ public static String restoreIndices( @Nullable String urnLike, @Nullable Integer start, @Nullable Integer batchSize, + @Nullable Integer limit, + @Nullable Long gePitEpochMs, + @Nullable Long lePitEpochMs, @Nonnull Authorizer authorizer, @Nonnull EntityService entityService) { @@ -49,14 +53,20 @@ public static String restoreIndices( } RestoreIndicesArgs args = new RestoreIndicesArgs() - .setAspectName(aspectName) - .setUrnLike(urnLike) - .setUrn(urn) - .setStart(start) - .setBatchSize(batchSize); + .aspectName(aspectName) + .urnLike(urnLike) + .urn(urn) + .start(start) + .batchSize(batchSize) + .limit(limit) + .gePitEpochMs(gePitEpochMs) + .lePitEpochMs(lePitEpochMs); Map result = new HashMap<>(); result.put("args", args); - result.put("result", entityService.restoreIndices(args, log::info)); + result.put("result", entityService + .streamRestoreIndices(args, log::info) + .map(RestoreIndicesResult::toString) + .collect(Collectors.joining("\n"))); return result.toString(); } } diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java index 9c44aefbed19d..33dffb4ed975c 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java @@ -31,6 +31,7 @@ import java.util.Set; import java.util.concurrent.Future; import java.util.function.Consumer; +import java.util.stream.Stream; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -244,7 +245,7 @@ String batchApplyRetention( Integer getCountAspect(@Nonnull String aspectName, @Nullable String urnLike); // TODO: Extract this to a different service, doesn't need to be here - RestoreIndicesResult restoreIndices( + Stream streamRestoreIndices( @Nonnull RestoreIndicesArgs args, @Nonnull Consumer logger); // Restore indices from list using key lookups (no scans) diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/restoreindices/RestoreIndicesArgs.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/restoreindices/RestoreIndicesArgs.java index e50b44b7f0eca..b4da40871cdd4 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/restoreindices/RestoreIndicesArgs.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/restoreindices/RestoreIndicesArgs.java @@ -1,13 +1,24 @@ package com.linkedin.metadata.entity.restoreindices; +import java.time.Instant; import lombok.Data; +import lombok.experimental.Accessors; @Data +@Accessors(fluent = true) public class RestoreIndicesArgs implements Cloneable { + public static final int DEFAULT_BATCH_SIZE = 500; + public static final int DEFAULT_NUM_THREADS = 1; + public static final int DEFAULT_BATCH_DELAY_MS = 1; + public static final long DEFAULT_GE_PIT_EPOCH_MS = 0; + public int start = 0; - public int batchSize = 10; - public int numThreads = 1; - public long batchDelayMs = 1; + public int batchSize = DEFAULT_BATCH_SIZE; + public int limit = 0; + public int numThreads = DEFAULT_NUM_THREADS; + public long batchDelayMs = DEFAULT_BATCH_DELAY_MS; + public long gePitEpochMs = DEFAULT_GE_PIT_EPOCH_MS; + public long lePitEpochMs; public String aspectName; public String urn; public String urnLike; @@ -26,37 +37,38 @@ public RestoreIndicesArgs clone() { } } - public RestoreIndicesArgs setAspectName(String aspectName) { - this.aspectName = aspectName; + public RestoreIndicesArgs start(Integer start) { + this.start = start != null ? start : 0; return this; } - public RestoreIndicesArgs setUrnLike(String urnLike) { - this.urnLike = urnLike; + public RestoreIndicesArgs batchSize(Integer batchSize) { + this.batchSize = batchSize != null ? batchSize : DEFAULT_BATCH_SIZE; return this; } - public RestoreIndicesArgs setUrn(String urn) { - this.urn = urn; + public RestoreIndicesArgs limit(Integer limit) { + this.limit = limit != null ? limit : 0; return this; } - public RestoreIndicesArgs setStart(Integer start) { - if (start != null) { - this.start = start; - } + public RestoreIndicesArgs numThreads(Integer numThreads) { + this.numThreads = numThreads != null ? numThreads : DEFAULT_NUM_THREADS; return this; } - public RestoreIndicesArgs setBatchSize(Integer batchSize) { - if (batchSize != null) { - this.batchSize = batchSize; - } + public RestoreIndicesArgs batchDelayMs(Long batchDelayMs) { + this.batchDelayMs = batchDelayMs != null ? batchDelayMs : DEFAULT_BATCH_DELAY_MS; + return this; + } + + public RestoreIndicesArgs gePitEpochMs(Long gePitEpochMs) { + this.gePitEpochMs = gePitEpochMs != null ? gePitEpochMs : DEFAULT_GE_PIT_EPOCH_MS; return this; } - public RestoreIndicesArgs setUrnBasedPagination(Boolean urnBasedPagination) { - this.urnBasedPagination = urnBasedPagination; + public RestoreIndicesArgs lePitEpochMs(Long lePitEpochMs) { + this.lePitEpochMs = lePitEpochMs != null ? lePitEpochMs : Instant.now().toEpochMilli(); return this; } } From bf52807e12f16facff9f88372f1fc598e87675d2 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Tue, 2 Apr 2024 07:28:06 -0700 Subject: [PATCH 06/17] feat(ui): show classification in test connection (#10156) --- .../app/ingest/source/builder/RecipeForm/TestConnection/types.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/TestConnection/types.ts b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/TestConnection/types.ts index 3395f0c67d8c8..4f401e34d1a39 100644 --- a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/TestConnection/types.ts +++ b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/TestConnection/types.ts @@ -12,6 +12,7 @@ export enum SourceCapability { TAGS = 'Extract Tags', SCHEMA_METADATA = 'Schema Metadata', CONTAINERS = 'Asset Containers', + CLASSIFICATION = 'Classification', } export interface ConnectionCapability { From db33c8646a74b271de3a5e4eef32ce1541251538 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Tue, 2 Apr 2024 07:28:43 -0700 Subject: [PATCH 07/17] fix(ingest): add classification dep for dynamodb (#10162) --- metadata-ingestion/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 7d6ba719eb353..33325b26d4e15 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -314,7 +314,7 @@ "dbt": {"requests"} | sqlglot_lib | aws_common, "dbt-cloud": {"requests"} | sqlglot_lib, "druid": sql_common | {"pydruid>=0.6.2"}, - "dynamodb": aws_common, + "dynamodb": aws_common | classification_lib, # Starting with 7.14.0 python client is checking if it is connected to elasticsearch client. If its not it throws # UnsupportedProductError # https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/release-notes.html#rn-7-14-0 From c9b9afc5307e1ae602675d67b37823a931a137f6 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Tue, 2 Apr 2024 07:29:27 -0700 Subject: [PATCH 08/17] feat(ingest/dbt): enable model performance and compiled code by default (#10164) --- docs/how/updating-datahub.md | 3 ++- .../src/datahub/ingestion/source/dbt/dbt_common.py | 8 ++------ .../src/datahub/ingestion/source/dbt/dbt_core.py | 6 +++--- metadata-ingestion/tests/integration/dbt/test_dbt.py | 9 +-------- metadata-ingestion/tests/unit/test_dbt_source.py | 2 +- 5 files changed, 9 insertions(+), 19 deletions(-) diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index 60504aaa7b80a..8051777a5ee07 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -26,7 +26,8 @@ This file documents any backwards-incompatible changes in DataHub and assists pe - #10055 - Assertion entities generated by dbt are now associated with the dbt dataset entity, and not the entity in the data warehouse. - #10090 - For Redshift ingestion, `use_lineage_v2` is now enabled by default. - #10147 - For looker ingestion, the browse paths for looker Dashboard, Chart, View, Explore have been updated to align with Looker UI. This does not affect URNs or lineage but primarily affects (improves) browsing experience. -- +- #10164 - For dbt ingestion, `entities_enabled.model_performance` and `include_compiled_code` are now both enabled by default. Upgrading dbt ingestion will also require upgrading the backend to 0.13.1. + ### Potential Downtime ### Deprecations diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index 788a4f0b5d616..4876e2b6fcff4 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -179,9 +179,7 @@ class DBTEntitiesEnabled(ConfigModel): description="Emit metadata for test results when set to Yes or Only", ) model_performance: EmitDirective = Field( - # TODO: This is currently disabled by default, but will be enabled by default once - # the models have stabilized. - EmitDirective.NO, + EmitDirective.YES, description="Emit model performance metadata when set to Yes or Only. " "Only supported with dbt core.", ) @@ -349,9 +347,7 @@ class DBTCommonConfig( _remove_use_compiled_code = pydantic_removed_field("use_compiled_code") include_compiled_code: bool = Field( - # TODO: Once the formattedViewLogic field model change is included in a server - # release, probably 0.13.1, we can flip the default to True. - default=False, + default=True, description="When enabled, includes the compiled code in the emitted metadata.", ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py index d04fa59ecbb6f..c885ee6525b08 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py @@ -53,9 +53,9 @@ class DBTCoreConfig(DBTCommonConfig): run_results_paths: List[str] = Field( default=[], description="Path to output of dbt test run as run_results files in JSON format. " - "If invoking dbt multiple times, you can provide paths to multiple run result files." - "See https://docs.getdbt.com/reference/artifacts/run-results-json. " - "If not specified, test execution results will not be populated in DataHub.", + "If not specified, test execution results and model performance metadata will not be populated in DataHub." + "If invoking dbt multiple times, you can provide paths to multiple run result files. " + "See https://docs.getdbt.com/reference/artifacts/run-results-json.", ) # Because we now also collect model performance metadata, the "test_results" field was renamed to "run_results". diff --git a/metadata-ingestion/tests/integration/dbt/test_dbt.py b/metadata-ingestion/tests/integration/dbt/test_dbt.py index 953ff24f7ba57..5f7d65f5b2377 100644 --- a/metadata-ingestion/tests/integration/dbt/test_dbt.py +++ b/metadata-ingestion/tests/integration/dbt/test_dbt.py @@ -26,8 +26,6 @@ _default_dbt_source_args = { # Needed to avoid needing to access datahub server. "write_semantics": "OVERRIDE", - # Needed until this is made the default. - "include_compiled_code": True, } @@ -216,12 +214,7 @@ def set_paths( manifest_file="sample_dbt_manifest_2.json", sources_file="sample_dbt_sources_2.json", run_results_files=["sample_dbt_run_results_2.json"], - source_config_modifiers={ - "entities_enabled": { - # TODO: Remove this once it becomes the default. - "model_performance": "YES", - }, - }, + source_config_modifiers={}, ), ], ids=lambda dbt_test_config: dbt_test_config.run_id, diff --git a/metadata-ingestion/tests/unit/test_dbt_source.py b/metadata-ingestion/tests/unit/test_dbt_source.py index 91a4e568d8200..b0db18594f76d 100644 --- a/metadata-ingestion/tests/unit/test_dbt_source.py +++ b/metadata-ingestion/tests/unit/test_dbt_source.py @@ -293,7 +293,7 @@ def test_dbt_entity_emission_configuration_helpers(): assert config.entities_enabled.can_emit_node_type("source") assert config.entities_enabled.can_emit_node_type("test") assert config.entities_enabled.can_emit_test_results - assert not config.entities_enabled.can_emit_model_performance + assert config.entities_enabled.can_emit_model_performance assert not config.entities_enabled.is_only_test_results() config_dict = { From 77c4629ccf18a65df3bbe4bf7352e7ae1b8e8b55 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Tue, 2 Apr 2024 09:36:44 -0500 Subject: [PATCH 09/17] refactor(docker): move to acryldata repo for all images (#9459) --- .../actions/docker-custom-build-and-push/action.yml | 2 +- .github/workflows/docker-unified.yml | 12 ++++++------ README.md | 2 +- build.gradle | 2 +- docker/README.md | 12 ++++++------ docker/docker-compose-with-cassandra.yml | 6 +++--- docker/docker-compose-without-neo4j.yml | 8 ++++---- docker/docker-compose.consumers-without-neo4j.yml | 4 ++-- docker/docker-compose.consumers.dev.yml | 4 ++-- docker/docker-compose.consumers.yml | 4 ++-- docker/docker-compose.dev.yml | 6 +++--- docker/docker-compose.yml | 8 ++++---- docker/ingestion/docker-compose.yml | 2 +- docker/profiles/docker-compose.frontend.yml | 4 ++-- docker/profiles/docker-compose.gms.yml | 12 ++++++------ docker/profiles/docker-compose.prerequisites.yml | 10 +++++----- docker/quickstart/docker-compose-m1.quickstart.yml | 8 ++++---- .../docker-compose-without-neo4j-m1.quickstart.yml | 8 ++++---- .../docker-compose-without-neo4j.quickstart.yml | 8 ++++---- ...er-compose.consumers-without-neo4j.quickstart.yml | 4 ++-- .../docker-compose.consumers.quickstart.yml | 4 ++-- docker/quickstart/docker-compose.quickstart.yml | 8 ++++---- docs/authentication/guides/add-users.md | 2 +- docs/authentication/guides/jaas.md | 4 ++-- .../guides/sso/configure-oidc-behind-proxy.md | 2 +- docs/deploy/aws.md | 4 ++-- docs/deploy/azure.md | 2 +- docs/docker/development.md | 8 ++++---- docs/how/extract-container-logs.md | 4 ++-- docs/troubleshooting/quickstart.md | 8 ++++---- .../src/datahub/ingestion/sink/datahub_rest.py | 2 +- .../src/datahub/telemetry/telemetry.py | 2 +- metadata-ingestion/src/datahub/upgrade/upgrade.py | 6 ++++-- .../linkedin/metadata/kafka/MclConsumerConfig.java | 2 +- .../linkedin/metadata/kafka/McpConsumerConfig.java | 2 +- .../main/java/com/datahub/gms/servlet/Config.java | 2 +- smoke-test/tests/read_only/test_services_up.py | 2 +- 37 files changed, 96 insertions(+), 94 deletions(-) diff --git a/.github/actions/docker-custom-build-and-push/action.yml b/.github/actions/docker-custom-build-and-push/action.yml index ca0796180cd57..3f8ea7a4c88eb 100644 --- a/.github/actions/docker-custom-build-and-push/action.yml +++ b/.github/actions/docker-custom-build-and-push/action.yml @@ -20,7 +20,7 @@ inputs: required: false images: - # e.g. linkedin/datahub-gms + # e.g. acryldata/datahub-gms description: "List of Docker images to use as base name for tags" required: true build-args: diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml index 38ffa3484c0bf..5396e6f17cb97 100644 --- a/.github/workflows/docker-unified.yml +++ b/.github/workflows/docker-unified.yml @@ -17,12 +17,12 @@ concurrency: cancel-in-progress: true env: - DATAHUB_GMS_IMAGE: "linkedin/datahub-gms" - DATAHUB_FRONTEND_IMAGE: "linkedin/datahub-frontend-react" - DATAHUB_MAE_CONSUMER_IMAGE: "linkedin/datahub-mae-consumer" - DATAHUB_MCE_CONSUMER_IMAGE: "linkedin/datahub-mce-consumer" - DATAHUB_KAFKA_SETUP_IMAGE: "linkedin/datahub-kafka-setup" - DATAHUB_ELASTIC_SETUP_IMAGE: "linkedin/datahub-elasticsearch-setup" + DATAHUB_GMS_IMAGE: "acryldata/datahub-gms" + DATAHUB_FRONTEND_IMAGE: "acryldata/datahub-frontend-react" + DATAHUB_MAE_CONSUMER_IMAGE: "acryldata/datahub-mae-consumer" + DATAHUB_MCE_CONSUMER_IMAGE: "acryldata/datahub-mce-consumer" + DATAHUB_KAFKA_SETUP_IMAGE: "acryldata/datahub-kafka-setup" + DATAHUB_ELASTIC_SETUP_IMAGE: "acryldata/datahub-elasticsearch-setup" DATAHUB_MYSQL_SETUP_IMAGE: "acryldata/datahub-mysql-setup" DATAHUB_UPGRADE_IMAGE: "acryldata/datahub-upgrade" DATAHUB_INGESTION_BASE_IMAGE: "acryldata/datahub-ingestion-base" diff --git a/README.md b/README.md index 6b8fa520e432e..dddb32da73f23 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ HOSTED_DOCS_ONLY--> [![Version](https://img.shields.io/github/v/release/datahub-project/datahub?include_prereleases)](https://github.com/datahub-project/datahub/releases/latest) [![PyPI version](https://badge.fury.io/py/acryl-datahub.svg)](https://badge.fury.io/py/acryl-datahub) [![build & test](https://github.com/datahub-project/datahub/workflows/build%20&%20test/badge.svg?branch=master&event=push)](https://github.com/datahub-project/datahub/actions?query=workflow%3A%22build+%26+test%22+branch%3Amaster+event%3Apush) -[![Docker Pulls](https://img.shields.io/docker/pulls/linkedin/datahub-gms.svg)](https://hub.docker.com/r/linkedin/datahub-gms) +[![Docker Pulls](https://img.shields.io/docker/pulls/acryldata/datahub-gms.svg)](https://hub.docker.com/r/acryldata/datahub-gms) [![Slack](https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social)](https://slack.datahubproject.io) [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg)](https://github.com/datahub-project/datahub/blob/master/docs/CONTRIBUTING.md) [![GitHub commit activity](https://img.shields.io/github/commit-activity/m/datahub-project/datahub)](https://github.com/datahub-project/datahub/pulls?q=is%3Apr) diff --git a/build.gradle b/build.gradle index 0d9c0f5dc18b0..5cf43755fceff 100644 --- a/build.gradle +++ b/build.gradle @@ -55,7 +55,7 @@ buildscript { ext.openLineageVersion = '1.5.0' ext.logbackClassicJava8 = '1.2.12' - ext.docker_registry = 'linkedin' + ext.docker_registry = 'acryldata' apply from: './repositories.gradle' buildscript.repositories.addAll(project.repositories) diff --git a/docker/README.md b/docker/README.md index 3510649707c65..ad847dc70cf3c 100644 --- a/docker/README.md +++ b/docker/README.md @@ -26,13 +26,13 @@ DataHub Docker Images: Do not use `latest` or `debug` tags for any of the image as those are not supported and present only due to legacy reasons. Please use `head` or tags specific for versions like `v0.8.40`. For production we recommend using version specific tags not `head`. * [acryldata/datahub-ingestion](https://hub.docker.com/r/acryldata/datahub-ingestion/) -* [linkedin/datahub-gms](https://hub.docker.com/repository/docker/linkedin/datahub-gms/) -* [linkedin/datahub-frontend-react](https://hub.docker.com/repository/docker/linkedin/datahub-frontend-react/) -* [linkedin/datahub-mae-consumer](https://hub.docker.com/repository/docker/linkedin/datahub-mae-consumer/) -* [linkedin/datahub-mce-consumer](https://hub.docker.com/repository/docker/linkedin/datahub-mce-consumer/) +* [acryldata/datahub-gms](https://hub.docker.com/repository/docker/acryldata/datahub-gms/) +* [acryldata/datahub-frontend-react](https://hub.docker.com/repository/docker/acryldata/datahub-frontend-react/) +* [acryldata/datahub-mae-consumer](https://hub.docker.com/repository/docker/acryldata/datahub-mae-consumer/) +* [acryldata/datahub-mce-consumer](https://hub.docker.com/repository/docker/acryldata/datahub-mce-consumer/) * [acryldata/datahub-upgrade](https://hub.docker.com/r/acryldata/datahub-upgrade/) -* [linkedin/datahub-kafka-setup](https://hub.docker.com/r/acryldata/datahub-kafka-setup/) -* [linkedin/datahub-elasticsearch-setup](https://hub.docker.com/r/linkedin/datahub-elasticsearch-setup/) +* [acryldata/datahub-kafka-setup](https://hub.docker.com/r/acryldata/datahub-kafka-setup/) +* [acryldata/datahub-elasticsearch-setup](https://hub.docker.com/r/acryldata/datahub-elasticsearch-setup/) * [acryldata/datahub-mysql-setup](https://hub.docker.com/r/acryldata/datahub-mysql-setup/) * [acryldata/datahub-postgres-setup](https://hub.docker.com/r/acryldata/datahub-postgres-setup/) * [acryldata/datahub-actions](https://hub.docker.com/r/acryldata/datahub-actions). Do not use `acryldata/acryl-datahub-actions` as that is deprecated and no longer used. diff --git a/docker/docker-compose-with-cassandra.yml b/docker/docker-compose-with-cassandra.yml index c99b6e97b4d80..d722b07b9a7af 100644 --- a/docker/docker-compose-with-cassandra.yml +++ b/docker/docker-compose-with-cassandra.yml @@ -8,7 +8,7 @@ version: '3.9' services: datahub-frontend-react: hostname: datahub-frontend-react - image: ${DATAHUB_FRONTEND_IMAGE:-linkedin/datahub-frontend-react}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_FRONTEND_IMAGE:-acryldata/datahub-frontend-react}:${DATAHUB_VERSION:-head} ports: - 9002:9002 build: @@ -32,7 +32,7 @@ services: condition: service_healthy datahub-gms: hostname: datahub-gms - image: ${DATAHUB_GMS_IMAGE:-linkedin/datahub-gms}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_GMS_IMAGE:-acryldata/datahub-gms}:${DATAHUB_VERSION:-head} ports: - 8080:8080 build: @@ -85,7 +85,7 @@ services: # This "container" is a workaround to pre-create search indices elasticsearch-setup: hostname: elasticsearch-setup - image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-linkedin/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-acryldata/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head} build: context: ../ dockerfile: docker/elasticsearch-setup/Dockerfile diff --git a/docker/docker-compose-without-neo4j.yml b/docker/docker-compose-without-neo4j.yml index 0ea61e4be7281..eae36fb849fd5 100644 --- a/docker/docker-compose-without-neo4j.yml +++ b/docker/docker-compose-without-neo4j.yml @@ -8,7 +8,7 @@ version: '3.9' services: datahub-frontend-react: hostname: datahub-frontend-react - image: ${DATAHUB_FRONTEND_IMAGE:-linkedin/datahub-frontend-react}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_FRONTEND_IMAGE:-acryldata/datahub-frontend-react}:${DATAHUB_VERSION:-head} ports: - ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002 build: @@ -33,7 +33,7 @@ services: condition: service_healthy datahub-gms: hostname: datahub-gms - image: ${DATAHUB_GMS_IMAGE:-linkedin/datahub-gms}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_GMS_IMAGE:-acryldata/datahub-gms}:${DATAHUB_VERSION:-head} ports: - ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080 build: @@ -73,7 +73,7 @@ services: # This "container" is a workaround to pre-create search indices elasticsearch-setup: hostname: elasticsearch-setup - image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-linkedin/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-acryldata/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head} build: context: ../ dockerfile: docker/elasticsearch-setup/Dockerfile @@ -88,7 +88,7 @@ services: datahub_setup_job: true kafka-setup: hostname: kafka-setup - image: ${DATAHUB_KAFKA_SETUP_IMAGE:-linkedin/datahub-kafka-setup}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_KAFKA_SETUP_IMAGE:-acryldata/datahub-kafka-setup}:${DATAHUB_VERSION:-head} build: dockerfile: ./docker/kafka-setup/Dockerfile context: ../ diff --git a/docker/docker-compose.consumers-without-neo4j.yml b/docker/docker-compose.consumers-without-neo4j.yml index b1c492c4c7df9..f1aa6b30cede0 100644 --- a/docker/docker-compose.consumers-without-neo4j.yml +++ b/docker/docker-compose.consumers-without-neo4j.yml @@ -7,7 +7,7 @@ services: - MCE_CONSUMER_ENABLED=false datahub-mae-consumer: hostname: datahub-mae-consumer - image: ${DATAHUB_MAE_CONSUMER_IMAGE:-linkedin/datahub-mae-consumer}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_MAE_CONSUMER_IMAGE:-acryldata/datahub-mae-consumer}:${DATAHUB_VERSION:-head} ports: - 9091:9091 build: @@ -19,7 +19,7 @@ services: - KAFKA_CONSUMER_HEALTH_CHECK_ENABLED=${KAFKA_CONSUMER_HEALTH_CHECK_ENABLED:-true} datahub-mce-consumer: hostname: datahub-mce-consumer - image: ${DATAHUB_MCE_CONSUMER_IMAGE:-linkedin/datahub-mce-consumer}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_MCE_CONSUMER_IMAGE:-acryldata/datahub-mce-consumer}:${DATAHUB_VERSION:-head} ports: - 9090:9090 build: diff --git a/docker/docker-compose.consumers.dev.yml b/docker/docker-compose.consumers.dev.yml index 5c272a9cf9b8a..00f7b52df151f 100644 --- a/docker/docker-compose.consumers.dev.yml +++ b/docker/docker-compose.consumers.dev.yml @@ -1,7 +1,7 @@ version: '3.9' services: datahub-mae-consumer: - image: linkedin/datahub-mae-consumer:debug + image: acryldata/datahub-mae-consumer:debug build: context: ../ dockerfile: docker/datahub-mae-consumer/Dockerfile @@ -13,7 +13,7 @@ services: - ../metadata-jobs/mae-consumer-job/build/libs/:/datahub/datahub-mae-consumer/bin/ - ./monitoring/client-prometheus-config.yaml:/datahub/datahub-mae-consumer/scripts/prometheus-config.yaml datahub-mce-consumer: - image: linkedin/datahub-mce-consumer:debug + image: acryldata/datahub-mce-consumer:debug build: context: ../ dockerfile: docker/datahub-mce-consumer/Dockerfile diff --git a/docker/docker-compose.consumers.yml b/docker/docker-compose.consumers.yml index 977e29b9a4abc..74b9adaeb9948 100644 --- a/docker/docker-compose.consumers.yml +++ b/docker/docker-compose.consumers.yml @@ -7,7 +7,7 @@ services: - MCE_CONSUMER_ENABLED=false datahub-mae-consumer: hostname: datahub-mae-consumer - image: ${DATAHUB_MAE_CONSUMER_IMAGE:-linkedin/datahub-mae-consumer}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_MAE_CONSUMER_IMAGE:-acryldata/datahub-mae-consumer}:${DATAHUB_VERSION:-head} ports: - 9091:9091 build: @@ -22,7 +22,7 @@ services: condition: service_healthy datahub-mce-consumer: hostname: datahub-mce-consumer - image: ${DATAHUB_MCE_CONSUMER_IMAGE:-linkedin/datahub-mce-consumer}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_MCE_CONSUMER_IMAGE:-acryldata/datahub-mce-consumer}:${DATAHUB_VERSION:-head} ports: - 9090:9090 build: diff --git a/docker/docker-compose.dev.yml b/docker/docker-compose.dev.yml index c7a3c5098d940..b6ac43a9eda43 100644 --- a/docker/docker-compose.dev.yml +++ b/docker/docker-compose.dev.yml @@ -11,7 +11,7 @@ version: '3.9' services: datahub-frontend-react: - image: linkedin/datahub-frontend-react:head + image: acryldata/datahub-frontend-react:head ports: - ${DATAHUB_MAPPED_FRONTEND_DEBUG_PORT:-5002}:5002 - ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002 @@ -27,7 +27,7 @@ services: - ../datahub-frontend/build/stage/main:/datahub-frontend - ./monitoring/client-prometheus-config.yaml:/datahub-frontend/client-prometheus-config.yaml datahub-gms: - image: linkedin/datahub-gms:debug + image: acryldata/datahub-gms:debug ports: - ${DATAHUB_MAPPED_GMS_DEBUG_PORT:-5001}:5001 - ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080 @@ -76,7 +76,7 @@ services: - ${HOME}/.datahub/plugins:/etc/datahub/plugins # Pre-creates the search indices using local mapping/settings.json elasticsearch-setup: - image: linkedin/datahub-elasticsearch-setup:head + image: acryldata/datahub-elasticsearch-setup:head build: context: elasticsearch-setup dockerfile: Dockerfile diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 79cd72a487a37..96f37496859a4 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -8,7 +8,7 @@ version: '3.9' services: datahub-frontend-react: hostname: datahub-frontend-react - image: ${DATAHUB_FRONTEND_IMAGE:-linkedin/datahub-frontend-react}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_FRONTEND_IMAGE:-acryldata/datahub-frontend-react}:${DATAHUB_VERSION:-head} ports: - ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002 build: @@ -32,7 +32,7 @@ services: condition: service_healthy datahub-gms: hostname: datahub-gms - image: ${DATAHUB_GMS_IMAGE:-linkedin/datahub-gms}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_GMS_IMAGE:-acryldata/datahub-gms}:${DATAHUB_VERSION:-head} environment: - KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR=${KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR:-true} ports: @@ -75,7 +75,7 @@ services: # This "container" is a workaround to pre-create search indices elasticsearch-setup: hostname: elasticsearch-setup - image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-linkedin/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-acryldata/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head} build: context: ../ dockerfile: docker/elasticsearch-setup/Dockerfile @@ -93,7 +93,7 @@ services: # explicitly wait for this container kafka-setup: hostname: kafka-setup - image: ${DATAHUB_KAFKA_SETUP_IMAGE:-linkedin/datahub-kafka-setup}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_KAFKA_SETUP_IMAGE:-acryldata/datahub-kafka-setup}:${DATAHUB_VERSION:-head} build: dockerfile: ./docker/kafka-setup/Dockerfile context: ../ diff --git a/docker/ingestion/docker-compose.yml b/docker/ingestion/docker-compose.yml index 2ba6872c0313a..06d4e47aa4a40 100644 --- a/docker/ingestion/docker-compose.yml +++ b/docker/ingestion/docker-compose.yml @@ -5,7 +5,7 @@ services: build: context: ../../ dockerfile: docker/datahub-ingestion/Dockerfile - image: linkedin/datahub-ingestion:${DATAHUB_VERSION:-head} + image: acryldata/datahub-ingestion:${DATAHUB_VERSION:-head} hostname: ingestion command: "ingest -c /sample_recipe.yml" volumes: diff --git a/docker/profiles/docker-compose.frontend.yml b/docker/profiles/docker-compose.frontend.yml index 4b2e7417fa61c..345493ba51650 100644 --- a/docker/profiles/docker-compose.frontend.yml +++ b/docker/profiles/docker-compose.frontend.yml @@ -1,7 +1,7 @@ x-datahub-frontend-service: &datahub-frontend-service hostname: datahub-frontend-react - image: ${DATAHUB_FRONTEND_IMAGE:-${DATAHUB_REPO:-linkedin}/datahub-frontend-react}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_FRONTEND_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-frontend-react}:${DATAHUB_VERSION:-head} ports: - ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002 env_file: @@ -15,7 +15,7 @@ x-datahub-frontend-service: &datahub-frontend-service x-datahub-frontend-service-dev: &datahub-frontend-service-dev <<: *datahub-frontend-service - image: ${DATAHUB_FRONTEND_IMAGE:-${DATAHUB_REPO:-linkedin}/datahub-frontend-react}:debug + image: ${DATAHUB_FRONTEND_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-frontend-react}:debug ports: - ${DATAHUB_MAPPED_FRONTEND_DEBUG_PORT:-5002}:5002 - ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002 diff --git a/docker/profiles/docker-compose.gms.yml b/docker/profiles/docker-compose.gms.yml index 961bd4464af95..e9baa65290e50 100644 --- a/docker/profiles/docker-compose.gms.yml +++ b/docker/profiles/docker-compose.gms.yml @@ -90,7 +90,7 @@ x-datahub-system-update-service-dev: &datahub-system-update-service-dev ################################# x-datahub-gms-service: &datahub-gms-service hostname: datahub-gms - image: ${DATAHUB_GMS_IMAGE:-${DATAHUB_REPO:-linkedin}/datahub-gms}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_GMS_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-gms}:${DATAHUB_VERSION:-head} ports: - ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080 env_file: @@ -112,7 +112,7 @@ x-datahub-gms-service: &datahub-gms-service x-datahub-gms-service-dev: &datahub-gms-service-dev <<: *datahub-gms-service - image: ${DATAHUB_GMS_IMAGE:-${DATAHUB_REPO:-linkedin}/datahub-gms}:debug + image: ${DATAHUB_GMS_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-gms}:debug ports: - ${DATAHUB_MAPPED_GMS_DEBUG_PORT:-5001}:5001 - ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080 @@ -137,7 +137,7 @@ x-datahub-gms-service-dev: &datahub-gms-service-dev ################################# x-datahub-mae-consumer-service: &datahub-mae-consumer-service hostname: datahub-mae-consumer - image: ${DATAHUB_MAE_CONSUMER_IMAGE:-${DATAHUB_REPO:-linkedin}/datahub-mae-consumer}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_MAE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mae-consumer}:${DATAHUB_VERSION:-head} ports: - 9091:9091 env_file: @@ -149,7 +149,7 @@ x-datahub-mae-consumer-service: &datahub-mae-consumer-service x-datahub-mae-consumer-service-dev: &datahub-mae-consumer-service-dev <<: *datahub-mae-consumer-service - image: ${DATAHUB_MAE_CONSUMER_IMAGE:-${DATAHUB_REPO:-linkedin}/datahub-mae-consumer}:debug + image: ${DATAHUB_MAE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mae-consumer}:debug environment: <<: [*datahub-dev-telemetry-env, *datahub-mae-consumer-env] volumes: @@ -163,7 +163,7 @@ x-datahub-mae-consumer-service-dev: &datahub-mae-consumer-service-dev ################################# x-datahub-mce-consumer-service: &datahub-mce-consumer-service hostname: datahub-mce-consumer - image: ${DATAHUB_MCE_CONSUMER_IMAGE:-${DATAHUB_REPO:-linkedin}/datahub-mce-consumer}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_MCE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mce-consumer}:${DATAHUB_VERSION:-head} ports: - 9090:9090 env_file: @@ -175,7 +175,7 @@ x-datahub-mce-consumer-service: &datahub-mce-consumer-service x-datahub-mce-consumer-service-dev: &datahub-mce-consumer-service-dev <<: *datahub-mce-consumer-service - image: ${DATAHUB_MCE_CONSUMER_IMAGE:-${DATAHUB_REPO:-linkedin}/datahub-mce-consumer}:debug + image: ${DATAHUB_MCE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mce-consumer}:debug environment: <<: [*datahub-dev-telemetry-env, *datahub-mce-consumer-env] volumes: diff --git a/docker/profiles/docker-compose.prerequisites.yml b/docker/profiles/docker-compose.prerequisites.yml index 7b1f6b8c99c0e..8de220093dda5 100644 --- a/docker/profiles/docker-compose.prerequisites.yml +++ b/docker/profiles/docker-compose.prerequisites.yml @@ -256,7 +256,7 @@ services: kafka-setup: &kafka-setup profiles: *profiles-quickstart hostname: kafka-setup - image: ${DATAHUB_KAFKA_SETUP_IMAGE:-${DATAHUB_REPO:-linkedin}/datahub-kafka-setup}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_KAFKA_SETUP_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-kafka-setup}:${DATAHUB_VERSION:-head} env_file: kafka-setup/env/docker.env environment: &kafka-setup-env DATAHUB_PRECREATE_TOPICS: ${DATAHUB_PRECREATE_TOPICS:-false} @@ -273,7 +273,7 @@ services: environment: <<: *kafka-setup-env DATAHUB_PRECREATE_TOPICS: ${DATAHUB_PRECREATE_TOPICS:-true} - image: ${DATAHUB_KAFKA_SETUP_IMAGE:-${DATAHUB_REPO:-linkedin}/datahub-kafka-setup}:debug + image: ${DATAHUB_KAFKA_SETUP_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-kafka-setup}:debug elasticsearch: profiles: *elasticsearch-profiles hostname: search @@ -297,7 +297,7 @@ services: volumes: - esdata:/usr/share/elasticsearch/data elasticsearch-setup-dev: &elasticsearch-setup-dev - image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-${DATAHUB_REPO:-linkedin}/datahub-elasticsearch-setup}:debug + image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-elasticsearch-setup}:debug profiles: *elasticsearch-profiles hostname: elasticsearch-setup env_file: elasticsearch-setup/env/docker.env @@ -335,7 +335,7 @@ services: <<: *elasticsearch-setup-dev profiles: *opensearch-profiles-quickstart hostname: opensearch-setup - image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-${DATAHUB_REPO:-linkedin}/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head} environment: <<: *search-datastore-environment USE_AWS_ELASTICSEARCH: ${USE_AWS_ELASTICSEARCH:-true} @@ -348,7 +348,7 @@ services: <<: *opensearch-setup profiles: *opensearch-profiles-dev hostname: opensearch-setup-dev - image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-${DATAHUB_REPO:-linkedin}/datahub-elasticsearch-setup}:debug + image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-elasticsearch-setup}:debug environment: <<: *search-datastore-environment USE_AWS_ELASTICSEARCH: ${USE_AWS_ELASTICSEARCH:-true} diff --git a/docker/quickstart/docker-compose-m1.quickstart.yml b/docker/quickstart/docker-compose-m1.quickstart.yml index 50f0c90c83175..d2ac2f151fcbb 100644 --- a/docker/quickstart/docker-compose-m1.quickstart.yml +++ b/docker/quickstart/docker-compose-m1.quickstart.yml @@ -64,7 +64,7 @@ services: - ELASTIC_CLIENT_HOST=elasticsearch - ELASTIC_CLIENT_PORT=9200 hostname: datahub-frontend-react - image: ${DATAHUB_FRONTEND_IMAGE:-linkedin/datahub-frontend-react}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_FRONTEND_IMAGE:-acryldata/datahub-frontend-react}:${DATAHUB_VERSION:-head} ports: - ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002 volumes: @@ -111,7 +111,7 @@ services: test: curl -sS --fail http://datahub-gms:${DATAHUB_GMS_PORT:-8080}/health timeout: 5s hostname: datahub-gms - image: ${DATAHUB_GMS_IMAGE:-linkedin/datahub-gms}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_GMS_IMAGE:-acryldata/datahub-gms}:${DATAHUB_VERSION:-head} ports: - ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080 volumes: @@ -185,7 +185,7 @@ services: - ELASTICSEARCH_PORT=9200 - ELASTICSEARCH_PROTOCOL=http hostname: elasticsearch-setup - image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-linkedin/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-acryldata/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head} labels: datahub_setup_job: true kafka-setup: @@ -200,7 +200,7 @@ services: - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 - USE_CONFLUENT_SCHEMA_REGISTRY=TRUE hostname: kafka-setup - image: ${DATAHUB_KAFKA_SETUP_IMAGE:-linkedin/datahub-kafka-setup}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_KAFKA_SETUP_IMAGE:-acryldata/datahub-kafka-setup}:${DATAHUB_VERSION:-head} labels: datahub_setup_job: true mysql: diff --git a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml index 9608b4383ab5d..1ba467d7fb928 100644 --- a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml +++ b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml @@ -64,7 +64,7 @@ services: - ELASTIC_CLIENT_HOST=elasticsearch - ELASTIC_CLIENT_PORT=9200 hostname: datahub-frontend-react - image: ${DATAHUB_FRONTEND_IMAGE:-linkedin/datahub-frontend-react}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_FRONTEND_IMAGE:-acryldata/datahub-frontend-react}:${DATAHUB_VERSION:-head} ports: - ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002 volumes: @@ -106,7 +106,7 @@ services: test: curl -sS --fail http://datahub-gms:${DATAHUB_GMS_PORT:-8080}/health timeout: 5s hostname: datahub-gms - image: ${DATAHUB_GMS_IMAGE:-linkedin/datahub-gms}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_GMS_IMAGE:-acryldata/datahub-gms}:${DATAHUB_VERSION:-head} ports: - ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080 volumes: @@ -178,7 +178,7 @@ services: - ELASTICSEARCH_PORT=9200 - ELASTICSEARCH_PROTOCOL=http hostname: elasticsearch-setup - image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-linkedin/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-acryldata/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head} labels: datahub_setup_job: true kafka-setup: @@ -193,7 +193,7 @@ services: - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 - USE_CONFLUENT_SCHEMA_REGISTRY=TRUE hostname: kafka-setup - image: ${DATAHUB_KAFKA_SETUP_IMAGE:-linkedin/datahub-kafka-setup}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_KAFKA_SETUP_IMAGE:-acryldata/datahub-kafka-setup}:${DATAHUB_VERSION:-head} labels: datahub_setup_job: true mysql: diff --git a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml index 176e5539aa491..893af253095bf 100644 --- a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml +++ b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml @@ -64,7 +64,7 @@ services: - ELASTIC_CLIENT_HOST=elasticsearch - ELASTIC_CLIENT_PORT=9200 hostname: datahub-frontend-react - image: ${DATAHUB_FRONTEND_IMAGE:-linkedin/datahub-frontend-react}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_FRONTEND_IMAGE:-acryldata/datahub-frontend-react}:${DATAHUB_VERSION:-head} ports: - ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002 volumes: @@ -106,7 +106,7 @@ services: test: curl -sS --fail http://datahub-gms:${DATAHUB_GMS_PORT:-8080}/health timeout: 5s hostname: datahub-gms - image: ${DATAHUB_GMS_IMAGE:-linkedin/datahub-gms}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_GMS_IMAGE:-acryldata/datahub-gms}:${DATAHUB_VERSION:-head} ports: - ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080 volumes: @@ -178,7 +178,7 @@ services: - ELASTICSEARCH_PORT=9200 - ELASTICSEARCH_PROTOCOL=http hostname: elasticsearch-setup - image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-linkedin/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-acryldata/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head} labels: datahub_setup_job: true kafka-setup: @@ -193,7 +193,7 @@ services: - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 - USE_CONFLUENT_SCHEMA_REGISTRY=TRUE hostname: kafka-setup - image: ${DATAHUB_KAFKA_SETUP_IMAGE:-linkedin/datahub-kafka-setup}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_KAFKA_SETUP_IMAGE:-acryldata/datahub-kafka-setup}:${DATAHUB_VERSION:-head} labels: datahub_setup_job: true mysql: diff --git a/docker/quickstart/docker-compose.consumers-without-neo4j.quickstart.yml b/docker/quickstart/docker-compose.consumers-without-neo4j.quickstart.yml index f61bb53d72ecc..a4211acedcf10 100644 --- a/docker/quickstart/docker-compose.consumers-without-neo4j.quickstart.yml +++ b/docker/quickstart/docker-compose.consumers-without-neo4j.quickstart.yml @@ -20,7 +20,7 @@ services: - GRAPH_SERVICE_IMPL=elasticsearch - ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-mae-consumer/resources/entity-registry.yml hostname: datahub-mae-consumer - image: ${DATAHUB_MAE_CONSUMER_IMAGE:-linkedin/datahub-mae-consumer}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_MAE_CONSUMER_IMAGE:-acryldata/datahub-mae-consumer}:${DATAHUB_VERSION:-head} ports: - 9091:9091 datahub-mce-consumer: @@ -52,7 +52,7 @@ services: - PE_CONSUMER_ENABLED=false - UI_INGESTION_ENABLED=false hostname: datahub-mce-consumer - image: ${DATAHUB_MCE_CONSUMER_IMAGE:-linkedin/datahub-mce-consumer}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_MCE_CONSUMER_IMAGE:-acryldata/datahub-mce-consumer}:${DATAHUB_VERSION:-head} ports: - 9090:9090 version: '3.9' diff --git a/docker/quickstart/docker-compose.consumers.quickstart.yml b/docker/quickstart/docker-compose.consumers.quickstart.yml index 3ceb5d537ffd8..e7571e4baf8b4 100644 --- a/docker/quickstart/docker-compose.consumers.quickstart.yml +++ b/docker/quickstart/docker-compose.consumers.quickstart.yml @@ -27,7 +27,7 @@ services: - GRAPH_SERVICE_IMPL=neo4j - ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-mae-consumer/resources/entity-registry.yml hostname: datahub-mae-consumer - image: ${DATAHUB_MAE_CONSUMER_IMAGE:-linkedin/datahub-mae-consumer}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_MAE_CONSUMER_IMAGE:-acryldata/datahub-mae-consumer}:${DATAHUB_VERSION:-head} ports: - 9091:9091 datahub-mce-consumer: @@ -66,7 +66,7 @@ services: - PE_CONSUMER_ENABLED=false - UI_INGESTION_ENABLED=false hostname: datahub-mce-consumer - image: ${DATAHUB_MCE_CONSUMER_IMAGE:-linkedin/datahub-mce-consumer}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_MCE_CONSUMER_IMAGE:-acryldata/datahub-mce-consumer}:${DATAHUB_VERSION:-head} ports: - 9090:9090 version: '3.9' diff --git a/docker/quickstart/docker-compose.quickstart.yml b/docker/quickstart/docker-compose.quickstart.yml index e39695f52a437..f3490ce502626 100644 --- a/docker/quickstart/docker-compose.quickstart.yml +++ b/docker/quickstart/docker-compose.quickstart.yml @@ -64,7 +64,7 @@ services: - ELASTIC_CLIENT_HOST=elasticsearch - ELASTIC_CLIENT_PORT=9200 hostname: datahub-frontend-react - image: ${DATAHUB_FRONTEND_IMAGE:-linkedin/datahub-frontend-react}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_FRONTEND_IMAGE:-acryldata/datahub-frontend-react}:${DATAHUB_VERSION:-head} ports: - ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002 volumes: @@ -111,7 +111,7 @@ services: test: curl -sS --fail http://datahub-gms:${DATAHUB_GMS_PORT:-8080}/health timeout: 5s hostname: datahub-gms - image: ${DATAHUB_GMS_IMAGE:-linkedin/datahub-gms}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_GMS_IMAGE:-acryldata/datahub-gms}:${DATAHUB_VERSION:-head} ports: - ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080 volumes: @@ -185,7 +185,7 @@ services: - ELASTICSEARCH_PORT=9200 - ELASTICSEARCH_PROTOCOL=http hostname: elasticsearch-setup - image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-linkedin/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-acryldata/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head} labels: datahub_setup_job: true kafka-setup: @@ -200,7 +200,7 @@ services: - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 - USE_CONFLUENT_SCHEMA_REGISTRY=TRUE hostname: kafka-setup - image: ${DATAHUB_KAFKA_SETUP_IMAGE:-linkedin/datahub-kafka-setup}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_KAFKA_SETUP_IMAGE:-acryldata/datahub-kafka-setup}:${DATAHUB_VERSION:-head} labels: datahub_setup_job: true mysql: diff --git a/docs/authentication/guides/add-users.md b/docs/authentication/guides/add-users.md index d380cacd6665e..86dac3ea328e5 100644 --- a/docs/authentication/guides/add-users.md +++ b/docs/authentication/guides/add-users.md @@ -134,7 +134,7 @@ For example, to mount a user.props file that is stored on my local filesystem at build: context: ../ dockerfile: docker/datahub-frontend/Dockerfile - image: linkedin/datahub-frontend-react:${DATAHUB_VERSION:-head} + image: acryldata/datahub-frontend-react:${DATAHUB_VERSION:-head} ..... # The new stuff volumes: diff --git a/docs/authentication/guides/jaas.md b/docs/authentication/guides/jaas.md index 6268d608f4926..42a87a781bd00 100644 --- a/docs/authentication/guides/jaas.md +++ b/docs/authentication/guides/jaas.md @@ -29,7 +29,7 @@ datahub-frontend-react: build: context: ../ dockerfile: docker/datahub-frontend/Dockerfile - image: linkedin/datahub-frontend-react:${DATAHUB_VERSION:-head} + image: acryldata/datahub-frontend-react:${DATAHUB_VERSION:-head} env_file: datahub-frontend/env/docker.env hostname: datahub-frontend-react container_name: datahub-frontend-react @@ -56,7 +56,7 @@ datahub-frontend-react: build: context: ../ dockerfile: docker/datahub-frontend/Dockerfile - image: linkedin/datahub-frontend-react:${DATAHUB_VERSION:-head} + image: acryldata/datahub-frontend-react:${DATAHUB_VERSION:-head} env_file: datahub-frontend/env/docker.env hostname: datahub-frontend-react container_name: datahub-frontend-react diff --git a/docs/authentication/guides/sso/configure-oidc-behind-proxy.md b/docs/authentication/guides/sso/configure-oidc-behind-proxy.md index 684bf768f2baf..c00f43228ea1e 100644 --- a/docs/authentication/guides/sso/configure-oidc-behind-proxy.md +++ b/docs/authentication/guides/sso/configure-oidc-behind-proxy.md @@ -34,7 +34,7 @@ To build a custom image for your frontend, with the certificates built-in, you c Example Dockerfile: ```dockerfile -FROM linkedin/datahub-frontend-react: +FROM acryldata/datahub-frontend-react: COPY /truststore-directory /certificates ``` diff --git a/docs/deploy/aws.md b/docs/deploy/aws.md index d060eddd9acc8..355ed41467008 100644 --- a/docs/deploy/aws.md +++ b/docs/deploy/aws.md @@ -137,7 +137,7 @@ file used to deploy datahub). Change datahub-frontend values to the following. datahub-frontend: enabled: true image: - repository: linkedin/datahub-frontend-react + repository: acryldata/datahub-frontend-react tag: "latest" ingress: enabled: true @@ -305,7 +305,7 @@ a different way of creating time based indices. elasticsearchSetupJob: enabled: true image: - repository: linkedin/datahub-elasticsearch-setup + repository: acryldata/datahub-elasticsearch-setup tag: "***" extraEnvs: - name: USE_AWS_ELASTICSEARCH diff --git a/docs/deploy/azure.md b/docs/deploy/azure.md index b940b82827e94..6ddd5fc5ba1d6 100644 --- a/docs/deploy/azure.md +++ b/docs/deploy/azure.md @@ -165,7 +165,7 @@ In order to use the ingress controller to expose frontend pod, we need to update datahub-frontend: enabled: true image: - repository: linkedin/datahub-frontend-react + repository: acryldata/datahub-frontend-react # tag: "v0.10.0 # defaults to .global.datahub.version # Set up ingress to expose react front-end diff --git a/docs/docker/development.md b/docs/docker/development.md index 91a303744a03b..35c708a4ac490 100644 --- a/docs/docker/development.md +++ b/docs/docker/development.md @@ -30,12 +30,12 @@ containers with remote debugging ports enabled. Once the `debug` docker images are constructed you'll see images similar to the following: ```shell -linkedin/datahub-frontend-react debug e52fef698025 28 minutes ago 763MB -linkedin/datahub-kafka-setup debug 3375aaa2b12d 55 minutes ago 659MB -linkedin/datahub-gms debug ea2b0a8ea115 56 minutes ago 408MB +acryldata/datahub-frontend-react debug e52fef698025 28 minutes ago 763MB +acryldata/datahub-kafka-setup debug 3375aaa2b12d 55 minutes ago 659MB +acryldata/datahub-gms debug ea2b0a8ea115 56 minutes ago 408MB acryldata/datahub-upgrade debug 322377a7a21d 56 minutes ago 463MB acryldata/datahub-mysql-setup debug 17768edcc3e5 2 hours ago 58.2MB -linkedin/datahub-elasticsearch-setup debug 4d935be7c62c 2 hours ago 26.1MB +acryldata/datahub-elasticsearch-setup debug 4d935be7c62c 2 hours ago 26.1MB ``` At this point it is possible to view the DataHub UI at `http://localhost:9002` as you normally would with quickstart. diff --git a/docs/how/extract-container-logs.md b/docs/how/extract-container-logs.md index b5fbb4c83cc64..d702a0acc9123 100644 --- a/docs/how/extract-container-logs.md +++ b/docs/how/extract-container-logs.md @@ -15,8 +15,8 @@ To do so, you can view all containers that Docker knows about by running the fol ``` johnjoyce@Johns-MBP datahub-fork % docker container ls CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES -6c4a280bc457 linkedin/datahub-frontend-react "datahub-frontend/bi…" 5 days ago Up 46 hours (healthy) 0.0.0.0:9002->9002/tcp datahub-frontend-react -122a2488ab63 linkedin/datahub-gms "/bin/sh -c /datahub…" 5 days ago Up 5 days (healthy) 0.0.0.0:8080->8080/tcp datahub-gms +6c4a280bc457 acryldata/datahub-frontend-react "datahub-frontend/bi…" 5 days ago Up 46 hours (healthy) 0.0.0.0:9002->9002/tcp datahub-frontend-react +122a2488ab63 acryldata/datahub-gms "/bin/sh -c /datahub…" 5 days ago Up 5 days (healthy) 0.0.0.0:8080->8080/tcp datahub-gms 7682dcc64afa confluentinc/cp-schema-registry:5.4.0 "/etc/confluent/dock…" 5 days ago Up 5 days 0.0.0.0:8081->8081/tcp schema-registry 3680fcaef3ed confluentinc/cp-kafka:5.4.0 "/etc/confluent/dock…" 5 days ago Up 5 days 0.0.0.0:9092->9092/tcp, 0.0.0.0:29092->29092/tcp broker 9d6730ddd4c4 neo4j:4.0.6 "/sbin/tini -g -- /d…" 5 days ago Up 5 days 0.0.0.0:7474->7474/tcp, 7473/tcp, 0.0.0.0:7687->7687/tcp neo4j diff --git a/docs/troubleshooting/quickstart.md b/docs/troubleshooting/quickstart.md index 0392ffc426a6c..0dfe11179083c 100644 --- a/docs/troubleshooting/quickstart.md +++ b/docs/troubleshooting/quickstart.md @@ -88,10 +88,10 @@ You can list all Docker containers in your local by running `docker container ls ``` CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES -979830a342ce linkedin/datahub-mce-consumer:latest "bash -c 'while ping…" 10 hours ago Up 10 hours datahub-mce-consumer -3abfc72e205d linkedin/datahub-frontend-react:latest "datahub-frontend…" 10 hours ago Up 10 hours 0.0.0.0:9002->9002/tcp datahub-frontend -50b2308a8efd linkedin/datahub-mae-consumer:latest "bash -c 'while ping…" 10 hours ago Up 10 hours datahub-mae-consumer -4d6b03d77113 linkedin/datahub-gms:latest "bash -c 'dockerize …" 10 hours ago Up 10 hours 0.0.0.0:8080->8080/tcp datahub-gms +979830a342ce acryldata/datahub-mce-consumer:latest "bash -c 'while ping…" 10 hours ago Up 10 hours datahub-mce-consumer +3abfc72e205d acryldata/datahub-frontend-react:latest "datahub-frontend…" 10 hours ago Up 10 hours 0.0.0.0:9002->9002/tcp datahub-frontend +50b2308a8efd acryldata/datahub-mae-consumer:latest "bash -c 'while ping…" 10 hours ago Up 10 hours datahub-mae-consumer +4d6b03d77113 acryldata/datahub-gms:latest "bash -c 'dockerize …" 10 hours ago Up 10 hours 0.0.0.0:8080->8080/tcp datahub-gms c267c287a235 landoop/schema-registry-ui:latest "/run.sh" 10 hours ago Up 10 hours 0.0.0.0:8000->8000/tcp schema-registry-ui 4b38899cc29a confluentinc/cp-schema-registry:5.2.1 "/etc/confluent/dock…" 10 hours ago Up 10 hours 0.0.0.0:8081->8081/tcp schema-registry 37c29781a263 confluentinc/cp-kafka:5.2.1 "/etc/confluent/dock…" 10 hours ago Up 10 hours 0.0.0.0:9092->9092/tcp, 0.0.0.0:29092->29092/tcp broker diff --git a/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py b/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py index a37f6ad8d279e..007b7487cb6a4 100644 --- a/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py +++ b/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py @@ -104,7 +104,7 @@ def __post_init__(self) -> None: self.report.gms_version = ( gms_config.get("versions", {}) - .get("linkedin/datahub", {}) + .get("acryldata/datahub", {}) .get("version", "") ) self.report.max_threads = self.config.max_threads diff --git a/metadata-ingestion/src/datahub/telemetry/telemetry.py b/metadata-ingestion/src/datahub/telemetry/telemetry.py index a802125e76b4e..08df9e80ecf29 100644 --- a/metadata-ingestion/src/datahub/telemetry/telemetry.py +++ b/metadata-ingestion/src/datahub/telemetry/telemetry.py @@ -335,7 +335,7 @@ def _server_props(self, server: Optional[DataHubGraph]) -> Dict[str, str]: "serverType", "missing" ), "server_version": server.server_config.get("versions", {}) - .get("linkedin/datahub", {}) + .get("acryldata/datahub", {}) .get("version", "missing"), "server_id": server.server_id or "missing", } diff --git a/metadata-ingestion/src/datahub/upgrade/upgrade.py b/metadata-ingestion/src/datahub/upgrade/upgrade.py index 075bfd29008f6..446f1a05b71a6 100644 --- a/metadata-ingestion/src/datahub/upgrade/upgrade.py +++ b/metadata-ingestion/src/datahub/upgrade/upgrade.py @@ -139,10 +139,12 @@ async def get_server_version_stats( current_server_release_date = None if server_config: server_version_string = ( - server_config.get("versions", {}).get("linkedin/datahub", {}).get("version") + server_config.get("versions", {}) + .get("acryldata/datahub", {}) + .get("version") ) commit_hash = ( - server_config.get("versions", {}).get("linkedin/datahub", {}).get("commit") + server_config.get("versions", {}).get("acryldata/datahub", {}).get("commit") ) server_type = server_config.get("datahub", {}).get("serverType", "unknown") if server_type == "quickstart" and commit_hash: diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MclConsumerConfig.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MclConsumerConfig.java index 686e2a816ffb5..280ca87d1cf04 100644 --- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MclConsumerConfig.java +++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MclConsumerConfig.java @@ -39,7 +39,7 @@ public MclConsumerConfig(GitVersion gitVersion) throws JsonProcessingException { config.put("noCode", "true"); Map versionConfig = new HashMap<>(); - versionConfig.put("linkedin/datahub", gitVersion.toConfig()); + versionConfig.put("acryldata/datahub", gitVersion.toConfig()); config.put("versions", versionConfig); configJson = OBJECT_MAPPER.writerWithDefaultPrettyPrinter().writeValueAsString(config); } diff --git a/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/McpConsumerConfig.java b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/McpConsumerConfig.java index b04ecc7761eb6..3db6dfa79516e 100644 --- a/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/McpConsumerConfig.java +++ b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/McpConsumerConfig.java @@ -39,7 +39,7 @@ public McpConsumerConfig(GitVersion gitVersion) throws JsonProcessingException { config.put("noCode", "true"); Map versionConfig = new HashMap<>(); - versionConfig.put("linkedin/datahub", gitVersion.toConfig()); + versionConfig.put("acryldata/datahub", gitVersion.toConfig()); config.put("versions", versionConfig); configJson = OBJECT_MAPPER.writerWithDefaultPrettyPrinter().writeValueAsString(config); } diff --git a/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/Config.java b/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/Config.java index cfa4c6425c131..33015c4adbec5 100644 --- a/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/Config.java +++ b/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/Config.java @@ -97,7 +97,7 @@ protected void doGet(HttpServletRequest req, HttpServletResponse resp) throws IO GitVersion version = getGitVersion(ctx); Map versionConfig = new HashMap<>(); - versionConfig.put("linkedin/datahub", version.toConfig()); + versionConfig.put("acryldata/datahub", version.toConfig()); config.put("versions", versionConfig); ConfigurationProvider configProvider = getConfigProvider(ctx); diff --git a/smoke-test/tests/read_only/test_services_up.py b/smoke-test/tests/read_only/test_services_up.py index 4e00f910ceb73..1fd43f884323c 100644 --- a/smoke-test/tests/read_only/test_services_up.py +++ b/smoke-test/tests/read_only/test_services_up.py @@ -25,7 +25,7 @@ def test_gms_config_accessible() -> None: assert gms_config is not None if DATAHUB_VERSION is not None: - assert gms_config["versions"]["linkedin/datahub"]["version"] == DATAHUB_VERSION + assert gms_config["versions"]["acryldata/datahub"]["version"] == DATAHUB_VERSION else: print("[WARN] TEST_DATAHUB_VERSION is not set") From 3671860c58ca8339f05c478d9a1cbb6860351b50 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Tue, 2 Apr 2024 10:46:01 -0500 Subject: [PATCH 10/17] fix(github): fix docker publish (#10186) --- .github/workflows/docker-unified.yml | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml index 5396e6f17cb97..5e1409003c476 100644 --- a/.github/workflows/docker-unified.yml +++ b/.github/workflows/docker-unified.yml @@ -73,7 +73,7 @@ jobs: - name: Check whether publishing enabled id: publish env: - ENABLE_PUBLISH: ${{ secrets.DOCKER_PASSWORD != '' && secrets.ACRYL_DOCKER_PASSWORD != '' }} + ENABLE_PUBLISH: ${{ secrets.ACRYL_DOCKER_PASSWORD != '' }} run: | echo "Enable publish: ${{ env.ENABLE_PUBLISH }}" echo "publish=${{ env.ENABLE_PUBLISH }}" >> $GITHUB_OUTPUT @@ -127,8 +127,8 @@ jobs: images: | ${{ env.DATAHUB_GMS_IMAGE }} tags: ${{ needs.setup.outputs.tag }} - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} + username: ${{ secrets.ACRYL_DOCKER_USERNAME }} + password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish }} context: . file: ./docker/datahub-gms/Dockerfile @@ -191,8 +191,8 @@ jobs: images: | ${{ env.DATAHUB_MAE_CONSUMER_IMAGE }} tags: ${{ needs.setup.outputs.tag }} - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} + username: ${{ secrets.ACRYL_DOCKER_USERNAME }} + password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish }} context: . file: ./docker/datahub-mae-consumer/Dockerfile @@ -255,8 +255,8 @@ jobs: images: | ${{ env.DATAHUB_MCE_CONSUMER_IMAGE }} tags: ${{ needs.setup.outputs.tag }} - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} + username: ${{ secrets.ACRYL_DOCKER_USERNAME }} + password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish }} context: . file: ./docker/datahub-mce-consumer/Dockerfile @@ -385,8 +385,8 @@ jobs: images: | ${{ env.DATAHUB_FRONTEND_IMAGE }} tags: ${{ needs.setup.outputs.tag }} - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} + username: ${{ secrets.ACRYL_DOCKER_USERNAME }} + password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish }} context: . file: ./docker/datahub-frontend/Dockerfile @@ -439,8 +439,8 @@ jobs: images: | ${{ env.DATAHUB_KAFKA_SETUP_IMAGE }} tags: ${{ needs.setup.outputs.tag }} - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} + username: ${{ secrets.ACRYL_DOCKER_USERNAME }} + password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish }} context: . file: ./docker/kafka-setup/Dockerfile @@ -481,8 +481,8 @@ jobs: images: | ${{ env.DATAHUB_ELASTIC_SETUP_IMAGE }} tags: ${{ needs.setup.outputs.tag }} - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} + username: ${{ secrets.ACRYL_DOCKER_USERNAME }} + password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish }} context: . file: ./docker/elasticsearch-setup/Dockerfile From a89e189e93db89616517ff2dc61bf2478fc592c8 Mon Sep 17 00:00:00 2001 From: RyanHolstien Date: Tue, 2 Apr 2024 10:48:17 -0500 Subject: [PATCH 11/17] feat(lineage): mark nodes as explored (#10180) --- .../UrnSearchAcrossLineageResultsMapper.java | 4 +++- .../src/main/resources/search.graphql | 9 ++++++-- .../graph/elastic/ESGraphQueryDAO.java | 19 +++++++++++---- .../metadata/search/LineageSearchService.java | 1 + .../search/SearchGraphServiceTestBase.java | 23 +++++++++++++++++++ .../metadata/graph/LineageRelationship.pdl | 5 ++++ .../metadata/search/LineageSearchEntity.pdl | 5 ++++ ...com.linkedin.entity.entities.snapshot.json | 5 ++++ ...nkedin.lineage.relationships.snapshot.json | 5 ++++ 9 files changed, 69 insertions(+), 7 deletions(-) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mappers/UrnSearchAcrossLineageResultsMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mappers/UrnSearchAcrossLineageResultsMapper.java index b39b960bb7580..b85303909c080 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mappers/UrnSearchAcrossLineageResultsMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mappers/UrnSearchAcrossLineageResultsMapper.java @@ -16,6 +16,7 @@ import com.linkedin.metadata.search.LineageSearchEntity; import com.linkedin.metadata.search.LineageSearchResult; import com.linkedin.metadata.search.SearchResultMetadata; +import java.util.ArrayList; import java.util.stream.Collectors; import javax.annotation.Nullable; @@ -69,7 +70,8 @@ private SearchAcrossLineageResult mapResult( .map(p -> mapPath(context, p)) .collect(Collectors.toList())) .setDegree(searchEntity.getDegree()) - .setDegrees(searchEntity.getDegrees().stream().collect(Collectors.toList())) + .setDegrees(new ArrayList<>(searchEntity.getDegrees())) + .setExplored(Boolean.TRUE.equals(searchEntity.isExplored())) .build(); } diff --git a/datahub-graphql-core/src/main/resources/search.graphql b/datahub-graphql-core/src/main/resources/search.graphql index 2b29994332d07..13c1ff2e8a764 100644 --- a/datahub-graphql-core/src/main/resources/search.graphql +++ b/datahub-graphql-core/src/main/resources/search.graphql @@ -644,7 +644,7 @@ type ScrollResults { } """ -Results returned by issueing a search across relationships query +Results returned by issuing a search across relationships query """ type SearchAcrossLineageResults { """ @@ -679,7 +679,7 @@ type SearchAcrossLineageResults { } """ -Results returned by issueing a search across relationships query using scroll API +Results returned by issuing a search across relationships query using scroll API """ type ScrollAcrossLineageResults { """ @@ -742,6 +742,11 @@ type SearchAcrossLineageResult { """ degrees: [Int!] + """ + Marks whether or not this entity was explored further for lineage + """ + explored: Boolean! + } """ diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java index 15a16833aeb7b..ea8d8fea54633 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java @@ -345,6 +345,8 @@ private Stream processOneHopLineage( int i) { // Do one hop on the lineage graph + int numHops = i + 1; // Zero indexed for loop counter, one indexed count + int remainingHops = maxHops - numHops; List oneHopRelationships = getLineageRelationshipsInBatches( currentLevel, @@ -352,8 +354,8 @@ private Stream processOneHopLineage( graphFilters, visitedEntities, viaEntities, - i + 1, - maxHops - (i + 1), + numHops, + remainingHops, remainingTime, existingPaths, exploreMultiplePaths, @@ -387,8 +389,9 @@ private Stream processOneHopLineage( || platformMatches( lineageRelationship.getEntity(), ignoreAsHops.get(entityType))))) - .forEach( - lineageRelationship -> additionalCurrentLevel.add(lineageRelationship.getEntity())); + .map(LineageRelationship::getEntity) + .forEach(additionalCurrentLevel::add); + ; if (!additionalCurrentLevel.isEmpty()) { Stream ignoreAsHopUrns = processOneHopLineage( @@ -417,6 +420,14 @@ private Stream processOneHopLineage( .sorted(Comparator.comparing(Urn::toString)) .limit(lineageFlags.getEntitiesExploredPerHopLimit()); } + if (remainingHops > 0) { + // If there are hops remaining, we expect to explore everything getting passed back to the + // loop, barring a timeout + List entitiesToExplore = intermediateStream.collect(Collectors.toList()); + entitiesToExplore.forEach(urn -> result.get(urn).setExplored(true)); + // reassign the stream after consuming it + intermediateStream = entitiesToExplore.stream(); + } } return intermediateStream; } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java index 3ea117663c23d..bb316f6f2b41c 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java @@ -738,6 +738,7 @@ private LineageSearchEntity buildLineageSearchEntity( if (lineageRelationship.hasDegrees()) { entity.setDegrees(lineageRelationship.getDegrees()); } + entity.setExplored(Boolean.TRUE.equals(lineageRelationship.isExplored())); } return entity; } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/search/SearchGraphServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/SearchGraphServiceTestBase.java index b389f8228a98d..85ca7ce7a1629 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/search/SearchGraphServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/SearchGraphServiceTestBase.java @@ -410,6 +410,29 @@ public void testTimestampLineage() throws Exception { Assert.assertEquals(Integer.valueOf(2), downstreamResult.getTotal()); } + @Test + public void testExplored() throws Exception { + + List edges = + Arrays.asList( + // One upstream edge + new Edge(dataset2Urn, dataset1Urn, downstreamOf, null, null, null, null, null), + // Two downstream + new Edge(dataset3Urn, dataset2Urn, downstreamOf, null, null, null, null, null), + new Edge(dataset4Urn, dataset2Urn, downstreamOf, null, null, null, null, null), + // One with null values, should always be returned + new Edge(dataset5Urn, dataset2Urn, downstreamOf, null, null, null, null, null)); + + edges.forEach(getGraphService()::addEdge); + syncAfterWrite(); + + EntityLineageResult result = getUpstreamLineage(dataset2Urn, null, null, 10); + Assert.assertTrue(Boolean.TRUE.equals(result.getRelationships().get(0).isExplored())); + + EntityLineageResult result2 = getUpstreamLineage(dataset2Urn, null, null, 10, 0); + Assert.assertTrue(result2.getRelationships().get(0).isExplored() == null); + } + /** * Utility method to reduce repeated parameters for lineage tests * diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/graph/LineageRelationship.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/graph/LineageRelationship.pdl index c25a1cee7db47..a169157955e67 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/metadata/graph/LineageRelationship.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/graph/LineageRelationship.pdl @@ -67,4 +67,9 @@ record LineageRelationship { * Replaces the deprecated field "degree". **/ degrees: optional array[int] + + /** + * Marks this relationship as explored during the graph walk + */ + explored: optional boolean } diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/search/LineageSearchEntity.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/search/LineageSearchEntity.pdl index e99115893712d..fdfc8b2d53291 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/metadata/search/LineageSearchEntity.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/search/LineageSearchEntity.pdl @@ -29,4 +29,9 @@ record LineageSearchEntity includes SearchEntity { * The degrees of separation (number of hops) between the source and this entity */ degrees: array[int] = [] + + /** + * Marks an entity as having been explored for as a part of the graph walk + */ + explored: optional boolean } \ No newline at end of file diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json index 011b9e419a0c0..4915f06ffe5d2 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json @@ -6205,6 +6205,11 @@ }, "doc" : "The degrees of separation (number of hops) between the source and this entity ", "default" : [ ] + }, { + "name" : "explored", + "type" : "boolean", + "doc" : "Marks an entity as having been explored for as a part of the graph walk", + "optional" : true } ] } }, diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.lineage.relationships.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.lineage.relationships.snapshot.json index 056ca0e4da206..00b3c925d0e73 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.lineage.relationships.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.lineage.relationships.snapshot.json @@ -177,6 +177,11 @@ }, "doc" : "The different depths at which this entity is discovered in the lineage graph.\nMarked as optional to maintain backward compatibility, but is filled out by implementations. \nReplaces the deprecated field \"degree\".\n", "optional" : true + }, { + "name" : "explored", + "type" : "boolean", + "doc" : "Marks this relationship as explored during the graph walk", + "optional" : true } ] } }, From e0b20e159b3413dab62f301371502fa41630990f Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Tue, 2 Apr 2024 21:34:22 +0530 Subject: [PATCH 12/17] feat(ingest/gc): add index truncation logic (#10099) --- .../datahub/ingestion/source/gc/datahub_gc.py | 135 +++++++++++++++++- 1 file changed, 133 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/gc/datahub_gc.py b/metadata-ingestion/src/datahub/ingestion/source/gc/datahub_gc.py index bf21e293e6a2f..de74470585e5e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/gc/datahub_gc.py +++ b/metadata-ingestion/src/datahub/ingestion/source/gc/datahub_gc.py @@ -1,10 +1,13 @@ +import datetime +import logging +import re import time from dataclasses import dataclass -from typing import Iterable +from typing import Dict, Iterable from pydantic import Field -from datahub.configuration.common import ConfigModel +from datahub.configuration.common import ConfigModel, OperationalError from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.decorators import ( SupportStatus, @@ -15,12 +18,30 @@ from datahub.ingestion.api.source import Source, SourceReport from datahub.ingestion.api.workunit import MetadataWorkUnit +logger = logging.getLogger(__name__) + class DataHubGcSourceConfig(ConfigModel): cleanup_expired_tokens: bool = Field( default=True, description="Whether to clean up expired tokens or not", ) + truncate_indices: bool = Field( + default=True, + description="Whether to truncate elasticsearch indices or not which can be safely truncated", + ) + truncate_index_older_than_days: int = Field( + default=30, + description="Indices older than this number of days will be truncated", + ) + truncation_watch_until: int = Field( + default=10000, + description="Wait for truncation of indices until this number of documents are left", + ) + truncation_sleep_between_seconds: int = Field( + default=30, + description="Sleep between truncation monitoring.", + ) @dataclass @@ -51,8 +72,118 @@ def get_workunits_internal( ) -> Iterable[MetadataWorkUnit]: if self.config.cleanup_expired_tokens: self.revoke_expired_tokens() + if self.config.truncate_indices: + self.truncate_indices() yield from [] + def truncate_indices(self) -> None: + self._truncate_timeseries_helper(aspect_name="operation", entity_type="dataset") + self._truncate_timeseries_helper( + aspect_name="datasetusagestatistics", entity_type="dataset" + ) + self._truncate_timeseries_helper( + aspect_name="chartUsageStatistics", entity_type="chart" + ) + self._truncate_timeseries_helper( + aspect_name="dashboardUsageStatistics", entity_type="dashboard" + ) + + def _truncate_timeseries_helper(self, aspect_name: str, entity_type: str) -> None: + self._truncate_timeseries_with_watch_optional( + aspect_name=aspect_name, entity_type=entity_type, watch=False + ) + self._truncate_timeseries_with_watch_optional( + aspect_name=aspect_name, entity_type=entity_type, watch=True + ) + + def _truncate_timeseries_with_watch_optional( + self, aspect_name: str, entity_type: str, watch: bool + ) -> None: + graph = self.graph + assert graph is not None + if watch: + to_delete = 1 + while to_delete > 0: + response = self.truncate_timeseries_util( + aspect=aspect_name, + dry_run=watch, + days_ago=self.config.truncate_index_older_than_days, + entity_type=entity_type, + ) + val = response.get("value", "") + if "This was a dry run" not in val or "out of" not in val: + return + prev_to_delete = to_delete + to_delete, total = re.findall(r"\d+", val)[:2] + to_delete = int(to_delete) + if to_delete <= 0: + logger.info("Nothing to delete.") + return + logger.info(f"to_delete {to_delete} / {total}") + if to_delete == prev_to_delete: + logger.info("Seems to be stuck. Ending the loop.") + break + elif to_delete < self.config.truncation_watch_until: + logger.info("Too small truncation. Not going to watch.") + return + else: + time.sleep(self.config.truncation_sleep_between_seconds) + else: + self.truncate_timeseries_util( + aspect=aspect_name, + dry_run=watch, + days_ago=self.config.truncate_index_older_than_days, + entity_type=entity_type, + ) + + def x_days_ago_millis(self, days: int) -> int: + x_days_ago_datetime = datetime.datetime.now( + datetime.timezone.utc + ) - datetime.timedelta(days=days) + return int(x_days_ago_datetime.timestamp() * 1000) + + def truncate_timeseries_util( + self, + aspect: str, + days_ago: int, + dry_run: bool = True, + entity_type: str = "dataset", + ) -> Dict: + graph = self.graph + assert graph is not None + + gms_url = graph._gms_server + if not dry_run: + logger.info( + f"Going to truncate timeseries for {aspect} for {gms_url} older than {days_ago} days" + ) + days_ago_millis = self.x_days_ago_millis(days_ago) + url = f"{gms_url}/operations?action=truncateTimeseriesAspect" + try: + response = graph._post_generic( + url=url, + payload_dict={ + "entityType": entity_type, + "aspect": aspect, + "endTimeMillis": days_ago_millis, + "dryRun": dry_run, + }, + ) + # logger.info(f"Response: {response}") + except OperationalError: + response = graph._post_generic( + url=url, + payload_dict={ + "entityType": entity_type, + "aspect": aspect, + "endTimeMillis": days_ago_millis, + "dryRun": dry_run, + "forceDeleteByQuery": True, + }, + ) + # logger.info(f"Response: {response}") + return response + def revoke_expired_tokens(self) -> None: total = 1 while total > 0: From 4bba834ffd6fb945d1437caec99b5b6707fb2e04 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Tue, 2 Apr 2024 12:12:58 -0500 Subject: [PATCH 13/17] fix(entity-service): fix findFirst when already present (#10187) --- .../java/com/linkedin/metadata/entity/EntityServiceImpl.java | 2 +- .../java/com/linkedin/metadata/entity/DeleteEntityService.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java index 754c5f272e275..ae3a1b63ba0eb 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java @@ -930,7 +930,7 @@ public IngestResult ingestProposal( AspectsBatchImpl.builder().mcps(List.of(proposal), auditStamp, this).build(), async) .stream() .findFirst() - .get(); + .orElse(null); } /** diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java index 2cd1aadf7665d..45fb85ffe42b1 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java @@ -349,7 +349,7 @@ private void updateAspect( final IngestResult ingestProposalResult = _entityService.ingestProposal(proposal, auditStamp, false); - if (!ingestProposalResult.isSqlCommitted()) { + if (ingestProposalResult != null && !ingestProposalResult.isSqlCommitted()) { log.error( "Failed to ingest aspect with references removed. Before {}, after: {}, please check MCP processor" + " logs for more information", From 2873736eace1dbc6bd8d19206b0d4c4e2f02a535 Mon Sep 17 00:00:00 2001 From: dushayntAW <158567391+dushayntAW@users.noreply.github.com> Date: Tue, 2 Apr 2024 23:05:47 +0530 Subject: [PATCH 14/17] fix(ingestion/salesforce): fixed the issue by escaping the markdown string (#10157) --- .../datahub/ingestion/source/salesforce.py | 7 +- .../account_fields_soql_response.json | 68 +++++++++++++++++++ .../salesforce/salesforce_mces_golden.json | 34 ++++++++++ 3 files changed, 108 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/salesforce.py b/metadata-ingestion/src/datahub/ingestion/source/salesforce.py index 6d52646f85d0a..35af541c9e532 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/salesforce.py +++ b/metadata-ingestion/src/datahub/ingestion/source/salesforce.py @@ -573,10 +573,15 @@ def _get_schema_field( fieldTags: List[str] = self.get_field_tags(fieldName, field) + description = self._get_field_description(field, customField) + + # escaping string starting with `#` + description = "\\" + description if description.startswith("#") else description + schemaField = SchemaFieldClass( fieldPath=fieldPath, type=SchemaFieldDataTypeClass(type=TypeClass()), # type:ignore - description=self._get_field_description(field, customField), + description=description, # nativeDataType is set to data type shown on salesforce user interface, # not the corresponding API data type names. nativeDataType=field["FieldDefinition"]["DataType"], diff --git a/metadata-ingestion/tests/integration/salesforce/mock_files/account_fields_soql_response.json b/metadata-ingestion/tests/integration/salesforce/mock_files/account_fields_soql_response.json index 947761b8c79a0..138c574323588 100644 --- a/metadata-ingestion/tests/integration/salesforce/mock_files/account_fields_soql_response.json +++ b/metadata-ingestion/tests/integration/salesforce/mock_files/account_fields_soql_response.json @@ -2508,6 +2508,74 @@ }, "RelationshipName": null, "IsNillable": true + }, + { + "attributes": { + "type": "EntityParticle", + "url": "/services/data/v54.0/tooling/sobjects/EntityParticle/Account.Unique_Account" + }, + "QualifiedApiName": "Unique_Account", + "DeveloperName": "Unique_Account", + "Label": "# Unique_Account", + "FieldDefinition": { + "attributes": { + "type": "FieldDefinition", + "url": "/services/data/v54.0/tooling/sobjects/FieldDefinition/Account.Unique_Account" + }, + "DataType": "Text(80)", + "LastModifiedDate": null, + "LastModifiedBy": null, + "IsIndexed": false, + "ComplianceGroup": null, + "SecurityClassification": null + }, + "DataType": "string", + "Precision": 0, + "Scale": 0, + "Length": 80, + "Digits": 0, + "IsUnique": false, + "IsCompound": false, + "IsComponent": false, + "ReferenceTo": { + "referenceTo": null + }, + "RelationshipName": null, + "IsNillable": true + }, + { + "attributes": { + "type": "EntityParticle", + "url": "/services/data/v54.0/tooling/sobjects/EntityParticle/Account.Unique_Number" + }, + "QualifiedApiName": "Unique_Number", + "DeveloperName": "Unique_Account", + "Label": "#Unique_Number", + "FieldDefinition": { + "attributes": { + "type": "FieldDefinition", + "url": "/services/data/v54.0/tooling/sobjects/FieldDefinition/Account.Unique_Number" + }, + "DataType": "Text(80)", + "LastModifiedDate": null, + "LastModifiedBy": null, + "IsIndexed": false, + "ComplianceGroup": null, + "SecurityClassification": null + }, + "DataType": "string", + "Precision": 0, + "Scale": 0, + "Length": 80, + "Digits": 0, + "IsUnique": false, + "IsCompound": false, + "IsComponent": false, + "ReferenceTo": { + "referenceTo": null + }, + "RelationshipName": null, + "IsNillable": true } ] } \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/salesforce/salesforce_mces_golden.json b/metadata-ingestion/tests/integration/salesforce/salesforce_mces_golden.json index 4e54f199eafed..90df0997495f0 100644 --- a/metadata-ingestion/tests/integration/salesforce/salesforce_mces_golden.json +++ b/metadata-ingestion/tests/integration/salesforce/salesforce_mces_golden.json @@ -1350,6 +1350,40 @@ }, "isPartOfKey": false, "jsonProps": "{}" + }, + { + "fieldPath": "Unique_Account", + "nullable": true, + "description": "\\# Unique_Account", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "Text(80)", + "recursive": false, + "globalTags": { + "tags": [] + }, + "isPartOfKey": false, + "jsonProps": "{}" + }, + { + "fieldPath": "Unique_Number", + "nullable": true, + "description": "\\#Unique_Number", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "Text(80)", + "recursive": false, + "globalTags": { + "tags": [] + }, + "isPartOfKey": false, + "jsonProps": "{}" } ], "primaryKeys": [ From 5c06f7a245356759470391f770ffb62b4b738042 Mon Sep 17 00:00:00 2001 From: Tamas Nemeth Date: Tue, 2 Apr 2024 22:13:05 +0200 Subject: [PATCH 15/17] fix(ingest/bigquery): Supporting lineage extraction in case the select query result's target table is set on job (#10191) Co-authored-by: Harshal Sheth --- .../datahub/ingestion/source/bigquery_v2/lineage.py | 11 ++++++++++- .../tests/unit/test_bigquery_lineage.py | 2 +- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py index 764f4f2449924..0d205679a8bf3 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py @@ -752,8 +752,17 @@ def _create_lineage_map( # Try the sql parser first. if self.config.lineage_use_sql_parser: + if e.statementType == "SELECT": + # We wrap select statements in a CTE to make them parseable as insert statement. + # This is a workaround for the sql parser to support the case where the user runs a query and inserts the result into a table.. + query = f"""create table `{destination_table.table_identifier.get_table_name()}` AS + ( + {e.query} + )""" + else: + query = e.query raw_lineage = sqlglot_lineage( - e.query, + query, schema_resolver=sql_parser_schema_resolver, default_db=e.project_id, ) diff --git a/metadata-ingestion/tests/unit/test_bigquery_lineage.py b/metadata-ingestion/tests/unit/test_bigquery_lineage.py index 1edac3fde0a6c..5d8c040b4123b 100644 --- a/metadata-ingestion/tests/unit/test_bigquery_lineage.py +++ b/metadata-ingestion/tests/unit/test_bigquery_lineage.py @@ -28,7 +28,7 @@ def lineage_entries() -> List[QueryEvent]: SELECT first.a, second.b FROM `my_project.my_dataset.my_source_table1` first LEFT JOIN `my_project.my_dataset.my_source_table2` second ON first.id = second.id """, - statementType="SELECT", + statementType="INSERT", project_id="proj_12344", end_time=None, referencedTables=[ From 57de905c66b6992aefb2051708fa83898fa82cec Mon Sep 17 00:00:00 2001 From: trialiya <41265764+trialiya@users.noreply.github.com> Date: Tue, 2 Apr 2024 23:33:58 +0300 Subject: [PATCH 16/17] fix(retention): fix time-based retention (#10118) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Хазиев Ленар --- .../metadata/entity/cassandra/CassandraRetentionService.java | 2 +- .../linkedin/metadata/entity/ebean/EbeanRetentionService.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraRetentionService.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraRetentionService.java index 91e3197529877..bc6ee6ddd5026 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraRetentionService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraRetentionService.java @@ -195,7 +195,7 @@ private void applyTimeBasedRetention( @Nonnull final Urn urn, @Nonnull final String aspectName, @Nonnull final TimeBasedRetention retention) { - Timestamp threshold = new Timestamp(_clock.millis() - retention.getMaxAgeInSeconds() * 1000); + Timestamp threshold = new Timestamp(_clock.millis() - retention.getMaxAgeInSeconds() * 1000L); SimpleStatement ss = deleteFrom(CassandraAspect.TABLE_NAME) .whereColumn(CassandraAspect.URN_COLUMN) diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanRetentionService.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanRetentionService.java index 250a81d9c8edc..77752153aad47 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanRetentionService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanRetentionService.java @@ -156,7 +156,7 @@ private Expression getTimeBasedRetentionQuery(@Nonnull final TimeBasedRetention return new SimpleExpression( EbeanAspectV2.CREATED_ON_COLUMN, Op.LT, - new Timestamp(_clock.millis() - retention.getMaxAgeInSeconds() * 1000)); + new Timestamp(_clock.millis() - retention.getMaxAgeInSeconds() * 1000L)); } private void applyRetention( From 888a1de9fc85169ead4eb9ba207ce85b56abcbd4 Mon Sep 17 00:00:00 2001 From: RyanHolstien Date: Tue, 2 Apr 2024 19:36:52 -0500 Subject: [PATCH 17/17] feat(lineage): give via and paths in entity lineage response (#10192) --- .../resolvers/load/EntityLineageResultResolver.java | 7 +++++++ .../datahub/graphql/types/mappers/MapperUtils.java | 9 +++++++++ .../mappers/UrnSearchAcrossLineageResultsMapper.java | 10 +--------- datahub-graphql-core/src/main/resources/entity.graphql | 4 ++++ datahub-graphql-core/src/main/resources/search.graphql | 5 +++++ .../metadata/graph/elastic/ESGraphQueryDAO.java | 8 +++++--- .../linkedin/metadata/search/LineageSearchService.java | 1 + .../linkedin/metadata/graph/LineageRelationship.pdl | 5 +++++ .../linkedin/metadata/search/LineageSearchEntity.pdl | 5 +++++ .../com.linkedin.entity.entities.snapshot.json | 5 +++++ .../com.linkedin.lineage.relationships.snapshot.json | 5 +++++ 11 files changed, 52 insertions(+), 12 deletions(-) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/EntityLineageResultResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/EntityLineageResultResolver.java index 8de18ec01e6dc..e28ec3dbb870f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/EntityLineageResultResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/EntityLineageResultResolver.java @@ -1,8 +1,10 @@ package com.linkedin.datahub.graphql.resolvers.load; import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*; +import static com.linkedin.datahub.graphql.types.mappers.MapperUtils.*; import com.datahub.authorization.AuthorizationConfiguration; +import com.linkedin.common.UrnArrayArray; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.template.SetMode; @@ -156,6 +158,11 @@ private LineageRelationship mapEntityRelationship( result.setUpdatedActor(UrnToEntityMapper.map(context, updatedActor)); } result.setIsManual(lineageRelationship.hasIsManual() && lineageRelationship.isIsManual()); + if (lineageRelationship.getPaths() != null) { + UrnArrayArray paths = lineageRelationship.getPaths(); + result.setPaths( + paths.stream().map(path -> mapPath(context, path)).collect(Collectors.toList())); + } return result; } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mappers/MapperUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mappers/MapperUtils.java index 3cae0155a86db..6bda333256a4c 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mappers/MapperUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mappers/MapperUtils.java @@ -3,9 +3,11 @@ import static com.linkedin.datahub.graphql.util.SearchInsightsUtil.*; import static com.linkedin.metadata.utils.SearchUtil.*; +import com.linkedin.common.UrnArray; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.generated.AggregationMetadata; +import com.linkedin.datahub.graphql.generated.EntityPath; import com.linkedin.datahub.graphql.generated.FacetMetadata; import com.linkedin.datahub.graphql.generated.MatchedField; import com.linkedin.datahub.graphql.generated.SearchResult; @@ -104,4 +106,11 @@ public static SearchSuggestion mapSearchSuggestion( return new SearchSuggestion( suggestion.getText(), suggestion.getScore(), Math.toIntExact(suggestion.getFrequency())); } + + public static EntityPath mapPath(@Nullable final QueryContext context, UrnArray path) { + EntityPath entityPath = new EntityPath(); + entityPath.setPath( + path.stream().map(p -> UrnToEntityMapper.map(context, p)).collect(Collectors.toList())); + return entityPath; + } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mappers/UrnSearchAcrossLineageResultsMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mappers/UrnSearchAcrossLineageResultsMapper.java index b85303909c080..ca363deb90c4d 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mappers/UrnSearchAcrossLineageResultsMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mappers/UrnSearchAcrossLineageResultsMapper.java @@ -3,11 +3,9 @@ import static com.linkedin.datahub.graphql.types.mappers.MapperUtils.*; import static com.linkedin.datahub.graphql.util.SearchInsightsUtil.*; -import com.linkedin.common.UrnArray; import com.linkedin.data.template.RecordTemplate; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.generated.Entity; -import com.linkedin.datahub.graphql.generated.EntityPath; import com.linkedin.datahub.graphql.generated.FreshnessStats; import com.linkedin.datahub.graphql.generated.SearchAcrossLineageResult; import com.linkedin.datahub.graphql.generated.SearchAcrossLineageResults; @@ -72,13 +70,7 @@ private SearchAcrossLineageResult mapResult( .setDegree(searchEntity.getDegree()) .setDegrees(new ArrayList<>(searchEntity.getDegrees())) .setExplored(Boolean.TRUE.equals(searchEntity.isExplored())) + .setIgnoredAsHop(Boolean.TRUE.equals(searchEntity.isIgnoredAsHop())) .build(); } - - private EntityPath mapPath(@Nullable final QueryContext context, UrnArray path) { - EntityPath entityPath = new EntityPath(); - entityPath.setPath( - path.stream().map(p -> UrnToEntityMapper.map(context, p)).collect(Collectors.toList())); - return entityPath; - } } diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql index b750d20626101..106148c425791 100644 --- a/datahub-graphql-core/src/main/resources/entity.graphql +++ b/datahub-graphql-core/src/main/resources/entity.graphql @@ -1331,6 +1331,10 @@ type LineageRelationship { """ isManual: Boolean + """ + The paths traversed for this relationship + """ + paths: [EntityPath] } """ diff --git a/datahub-graphql-core/src/main/resources/search.graphql b/datahub-graphql-core/src/main/resources/search.graphql index 13c1ff2e8a764..499ac3a0860d4 100644 --- a/datahub-graphql-core/src/main/resources/search.graphql +++ b/datahub-graphql-core/src/main/resources/search.graphql @@ -747,6 +747,11 @@ type SearchAcrossLineageResult { """ explored: Boolean! + """ + Whether this relationship was ignored as a hop + """ + ignoredAsHop: Boolean! + } """ diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java index ea8d8fea54633..bdcbf020ecf78 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java @@ -389,9 +389,11 @@ private Stream processOneHopLineage( || platformMatches( lineageRelationship.getEntity(), ignoreAsHops.get(entityType))))) - .map(LineageRelationship::getEntity) - .forEach(additionalCurrentLevel::add); - ; + .forEach( + lineageRelationship -> { + additionalCurrentLevel.add(lineageRelationship.getEntity()); + lineageRelationship.setIgnoredAsHop(true); + }); if (!additionalCurrentLevel.isEmpty()) { Stream ignoreAsHopUrns = processOneHopLineage( diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java index bb316f6f2b41c..94f56fec2acc9 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java @@ -739,6 +739,7 @@ private LineageSearchEntity buildLineageSearchEntity( entity.setDegrees(lineageRelationship.getDegrees()); } entity.setExplored(Boolean.TRUE.equals(lineageRelationship.isExplored())); + entity.setIgnoredAsHop(Boolean.TRUE.equals(lineageRelationship.isIgnoredAsHop())); } return entity; } diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/graph/LineageRelationship.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/graph/LineageRelationship.pdl index a169157955e67..552dd7323b551 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/metadata/graph/LineageRelationship.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/graph/LineageRelationship.pdl @@ -72,4 +72,9 @@ record LineageRelationship { * Marks this relationship as explored during the graph walk */ explored: optional boolean + + /** + * Whether this relationship was ignored as a hop while performing the graph walk + */ + ignoredAsHop: optional boolean } diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/search/LineageSearchEntity.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/search/LineageSearchEntity.pdl index fdfc8b2d53291..3fd8a48c6bf5e 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/metadata/search/LineageSearchEntity.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/search/LineageSearchEntity.pdl @@ -34,4 +34,9 @@ record LineageSearchEntity includes SearchEntity { * Marks an entity as having been explored for as a part of the graph walk */ explored: optional boolean + + /** + * Whether this relationship was ignored as a hop while performing the graph walk + */ + ignoredAsHop: optional boolean } \ No newline at end of file diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json index 4915f06ffe5d2..43845a5fbbf6a 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json @@ -6210,6 +6210,11 @@ "type" : "boolean", "doc" : "Marks an entity as having been explored for as a part of the graph walk", "optional" : true + }, { + "name" : "ignoredAsHop", + "type" : "boolean", + "doc" : "Whether this relationship was ignored as a hop while performing the graph walk", + "optional" : true } ] } }, diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.lineage.relationships.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.lineage.relationships.snapshot.json index 00b3c925d0e73..3886faffadedb 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.lineage.relationships.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.lineage.relationships.snapshot.json @@ -182,6 +182,11 @@ "type" : "boolean", "doc" : "Marks this relationship as explored during the graph walk", "optional" : true + }, { + "name" : "ignoredAsHop", + "type" : "boolean", + "doc" : "Whether this relationship was ignored as a hop while performing the graph walk", + "optional" : true } ] } },