diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java index 5b265b6714452..d1da55268a50d 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java @@ -63,6 +63,7 @@ import com.linkedin.datahub.graphql.generated.Domain; import com.linkedin.datahub.graphql.generated.ERModelRelationship; import com.linkedin.datahub.graphql.generated.ERModelRelationshipProperties; +import com.linkedin.datahub.graphql.generated.Entity; import com.linkedin.datahub.graphql.generated.EntityPath; import com.linkedin.datahub.graphql.generated.EntityRelationship; import com.linkedin.datahub.graphql.generated.EntityRelationshipLegacy; @@ -312,6 +313,7 @@ import com.linkedin.datahub.graphql.resolvers.type.HyperParameterValueTypeResolver; import com.linkedin.datahub.graphql.resolvers.type.PlatformSchemaUnionTypeResolver; import com.linkedin.datahub.graphql.resolvers.type.PropertyValueResolver; +import com.linkedin.datahub.graphql.resolvers.type.ResolvedActorResolver; import com.linkedin.datahub.graphql.resolvers.type.ResultsTypeResolver; import com.linkedin.datahub.graphql.resolvers.type.TimeSeriesAspectInterfaceTypeResolver; import com.linkedin.datahub.graphql.resolvers.user.CreateNativeUserResetTokenResolver; @@ -1730,12 +1732,22 @@ private void configureDatasetResolvers(final RuntimeWiring.Builder builder) { .type( "InstitutionalMemoryMetadata", typeWiring -> - typeWiring.dataFetcher( - "author", - new LoadableTypeResolver<>( - corpUserType, - (env) -> - ((InstitutionalMemoryMetadata) env.getSource()).getAuthor().getUrn()))) + typeWiring + .dataFetcher( + "author", + new LoadableTypeResolver<>( + corpUserType, + (env) -> + ((InstitutionalMemoryMetadata) env.getSource()) + .getAuthor() + .getUrn())) + .dataFetcher( + "actor", + new EntityTypeResolver( + this.entityTypes, + (env) -> + (Entity) + ((InstitutionalMemoryMetadata) env.getSource()).getActor()))) .type( "DatasetStatsSummary", typeWiring -> @@ -2242,6 +2254,7 @@ private void configureTypeResolvers(final RuntimeWiring.Builder builder) { "HyperParameterValueType", typeWiring -> typeWiring.typeResolver(new HyperParameterValueTypeResolver())) .type("PropertyValue", typeWiring -> typeWiring.typeResolver(new PropertyValueResolver())) + .type("ResolvedActor", typeWiring -> typeWiring.typeResolver(new ResolvedActorResolver())) .type("Aspect", typeWiring -> typeWiring.typeResolver(new AspectInterfaceTypeResolver())) .type( "TimeSeriesAspect", diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/resolver/GetChartsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/resolver/GetChartsResolver.java index 0fe6e5de0cac6..197ac87c1e22d 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/resolver/GetChartsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/resolver/GetChartsResolver.java @@ -84,8 +84,21 @@ private TimeSeriesChart getActiveUsersTimeSeriesChart( final DateTime end, final String title, final DateInterval interval) { - final DateRange dateRange = - new DateRange(String.valueOf(beginning.getMillis()), String.valueOf(end.getMillis())); + + final DateRange dateRange; + + // adjust month to show 1st of month rather than last day of previous month + if (interval == DateInterval.MONTH) { + dateRange = + new DateRange( + String.valueOf(beginning.plusDays(1).getMillis()), // Shift start by 1 day + String.valueOf(end.plusDays(1).getMillis()) // Shift end by 1 day + ); + } else { + // week display starting Sundays + dateRange = + new DateRange(String.valueOf(beginning.getMillis()), String.valueOf(end.getMillis())); + } final List timeSeriesLines = _analyticsService.getTimeseriesChart( @@ -96,6 +109,7 @@ private TimeSeriesChart getActiveUsersTimeSeriesChart( ImmutableMap.of(), Collections.emptyMap(), Optional.of("browserId")); + return TimeSeriesChart.builder() .setTitle(title) .setDateRange(dateRange) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/type/ResolvedActorResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/type/ResolvedActorResolver.java new file mode 100644 index 0000000000000..7ae719a23b00a --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/type/ResolvedActorResolver.java @@ -0,0 +1,25 @@ +package com.linkedin.datahub.graphql.resolvers.type; + +import com.linkedin.datahub.graphql.generated.CorpGroup; +import com.linkedin.datahub.graphql.generated.CorpUser; +import graphql.TypeResolutionEnvironment; +import graphql.schema.GraphQLObjectType; +import graphql.schema.TypeResolver; + +public class ResolvedActorResolver implements TypeResolver { + + public static final String CORP_USER = "CorpUser"; + public static final String CORP_GROUP = "CorpGroup"; + + @Override + public GraphQLObjectType getType(TypeResolutionEnvironment env) { + if (env.getObject() instanceof CorpUser) { + return env.getSchema().getObjectType(CORP_USER); + } else if (env.getObject() instanceof CorpGroup) { + return env.getSchema().getObjectType(CORP_GROUP); + } else { + throw new RuntimeException( + "Unrecognized object type provided to type resolver, Type:" + env.getObject().toString()); + } + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/InstitutionalMemoryMetadataMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/InstitutionalMemoryMetadataMapper.java index 7c6de02ecc876..9781643c414c8 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/InstitutionalMemoryMetadataMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/InstitutionalMemoryMetadataMapper.java @@ -28,6 +28,7 @@ public InstitutionalMemoryMetadata apply( result.setDescription(input.getDescription()); // deprecated field result.setLabel(input.getDescription()); result.setAuthor(getAuthor(input.getCreateStamp().getActor().toString())); + result.setActor(ResolvedActorMapper.map(input.getCreateStamp().getActor())); result.setCreated(AuditStampMapper.map(context, input.getCreateStamp())); result.setAssociatedUrn(entityUrn.toString()); return result; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/ResolvedActorMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/ResolvedActorMapper.java new file mode 100644 index 0000000000000..c00ffd0b828b1 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/ResolvedActorMapper.java @@ -0,0 +1,31 @@ +package com.linkedin.datahub.graphql.types.common.mappers; + +import com.linkedin.common.urn.Urn; +import com.linkedin.datahub.graphql.generated.CorpGroup; +import com.linkedin.datahub.graphql.generated.CorpUser; +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.datahub.graphql.generated.ResolvedActor; +import com.linkedin.metadata.Constants; +import javax.annotation.Nonnull; + +public class ResolvedActorMapper { + + public static final ResolvedActorMapper INSTANCE = new ResolvedActorMapper(); + + public static ResolvedActor map(@Nonnull final Urn actorUrn) { + return INSTANCE.apply(actorUrn); + } + + public ResolvedActor apply(@Nonnull final Urn actorUrn) { + if (actorUrn.getEntityType().equals(Constants.CORP_GROUP_ENTITY_NAME)) { + CorpGroup partialGroup = new CorpGroup(); + partialGroup.setUrn(actorUrn.toString()); + partialGroup.setType(EntityType.CORP_GROUP); + return partialGroup; + } + CorpUser partialUser = new CorpUser(); + partialUser.setUrn(actorUrn.toString()); + partialUser.setType(EntityType.CORP_USER); + return (ResolvedActor) partialUser; + } +} diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql index 732a782139b61..049527e5d77e3 100644 --- a/datahub-graphql-core/src/main/resources/entity.graphql +++ b/datahub-graphql-core/src/main/resources/entity.graphql @@ -3005,8 +3005,14 @@ type InstitutionalMemoryMetadata { """ The author of this metadata + Deprecated! Use actor instead for users or groups. """ - author: CorpUser! + author: CorpUser! @deprecated(reason: "Use `actor`") + + """ + The author of this metadata + """ + actor: ResolvedActor! """ An AuditStamp corresponding to the creation of this resource @@ -3834,6 +3840,8 @@ enum CorpUserStatus { ACTIVE } +union ResolvedActor = CorpUser | CorpGroup + """ A DataHub User entity, which represents a Person on the Metadata Entity Graph """ diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/AggregateAcrossEntitiesResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/AggregateAcrossEntitiesResolverTest.java index 1b33118bd154a..0a8e4e8b4fa5f 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/AggregateAcrossEntitiesResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/AggregateAcrossEntitiesResolverTest.java @@ -386,7 +386,11 @@ private static EntityClient initMockEntityClient( Mockito.when( client.searchAcrossEntities( any(), - Mockito.eq(entityTypes), + Mockito.argThat( + argument -> + argument != null + && argument.containsAll(entityTypes) + && entityTypes.containsAll(argument)), Mockito.eq(query), Mockito.eq(filter), Mockito.eq(start), @@ -409,7 +413,11 @@ private static void verifyMockEntityClient( Mockito.verify(mockClient, Mockito.times(1)) .searchAcrossEntities( any(), - Mockito.eq(entityTypes), + Mockito.argThat( + argument -> + argument != null + && argument.containsAll(entityTypes) + && entityTypes.containsAll(argument)), Mockito.eq(query), Mockito.eq(filter), Mockito.eq(start), diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolverTest.java index a601a815453b2..42768b8a2de21 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolverTest.java @@ -462,7 +462,11 @@ private static EntityClient initMockEntityClient( Mockito.when( client.searchAcrossEntities( any(), - Mockito.eq(entityTypes), + Mockito.argThat( + argument -> + argument != null + && argument.containsAll(entityTypes) + && entityTypes.containsAll(argument)), Mockito.eq(query), Mockito.eq(filter), Mockito.eq(start), @@ -483,7 +487,11 @@ private static void verifyMockEntityClient( Mockito.verify(mockClient, Mockito.times(1)) .searchAcrossEntities( any(), - Mockito.eq(entityTypes), + Mockito.argThat( + argument -> + argument != null + && argument.containsAll(entityTypes) + && entityTypes.containsAll(argument)), Mockito.eq(query), Mockito.eq(filter), Mockito.eq(start), diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/utils/DateUtilTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/utils/DateUtilTest.java index 6ecbc8d015b29..4383df9d46a4b 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/utils/DateUtilTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/utils/DateUtilTest.java @@ -47,4 +47,26 @@ public void testStartOfNextWeek() { Mockito.when(dateUtil.getNow()).thenReturn(setTimeParts(8, false)); assertEqualStartOfNextWeek(dateUtil, 9); } + + // validates logic to display correct dates in MAU chart + @Test + public void testDateAdjustmentsForMonth() { + DateUtil dateUtil = Mockito.spy(DateUtil.class); + + Mockito.when(dateUtil.getNow()).thenReturn(new DateTime(2024, 11, 15, 0, 0, 0)); + + // start date should be next month minus a day + // but we want to display Dec 1 instead of Nov 30, so add a day and verify it's Dec + DateTime startOfNextMonthMinus12 = dateUtil.getStartOfNextMonth().minusMonths(12); + DateTime adjustedStart = startOfNextMonthMinus12.minusMillis(1).plusDays(1); + assertEquals(12, adjustedStart.getMonthOfYear()); // Verify it is December + assertEquals(2023, adjustedStart.getYear()); // Verify it is 2023 + + // verify that the end date displays correctly + // the chart will display Oct 1 as the last month because we don't show current month + DateTime startOfThisMonth = dateUtil.getStartOfThisMonth(); + DateTime adjustedEnd = startOfThisMonth.minusMillis(1).plusDays(1); + assertEquals(11, adjustedEnd.getMonthOfYear()); // Verify it is November + assertEquals(2024, adjustedEnd.getYear()); // Verify it is 2024 + } } diff --git a/datahub-web-react/src/Mocks.tsx b/datahub-web-react/src/Mocks.tsx index aed672a34e7ca..329d6250e576a 100644 --- a/datahub-web-react/src/Mocks.tsx +++ b/datahub-web-react/src/Mocks.tsx @@ -566,6 +566,12 @@ export const dataset3 = { username: 'datahub', type: EntityType.CorpUser, }, + actor: { + __typename: 'CorpUser', + urn: 'urn:li:corpuser:datahub', + username: 'datahub', + type: EntityType.CorpUser, + }, description: 'This only points to Google', label: 'This only points to Google', created: { diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/LinkButton.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/LinkButton.tsx index 0ce3c9641d559..c3896baedace7 100644 --- a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/LinkButton.tsx +++ b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/LinkButton.tsx @@ -29,7 +29,7 @@ export default function LinkButton({ link }: Props) { href={link.url} target="_blank" rel="noreferrer" - key={`${link.label}-${link.url}-${link.author}`} + key={`${link.label}-${link.url}-${link.actor.urn}`} > {link.description || link.label} diff --git a/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/LinkList.tsx b/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/LinkList.tsx index 7212198bbf61c..6eb680785599e 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/LinkList.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/LinkList.tsx @@ -3,7 +3,7 @@ import { Link } from 'react-router-dom'; import styled from 'styled-components/macro'; import { message, Button, List, Typography, Modal, Form, Input } from 'antd'; import { LinkOutlined, DeleteOutlined, EditOutlined } from '@ant-design/icons'; -import { EntityType, InstitutionalMemoryMetadata } from '../../../../../../types.generated'; +import { InstitutionalMemoryMetadata } from '../../../../../../types.generated'; import { useEntityData, useMutationUrn } from '../../../EntityContext'; import { useEntityRegistry } from '../../../../../useEntityRegistry'; import { ANTD_GRAY } from '../../../constants'; @@ -182,10 +182,8 @@ export const LinkList = ({ refetch }: LinkListProps) => { description={ <> Added {formatDateString(link.created.time)} by{' '} - - {link.author.username} + + {entityRegistry.getDisplayName(link.actor.type, link.actor)} } diff --git a/datahub-web-react/src/app/ingest/source/builder/sources.json b/datahub-web-react/src/app/ingest/source/builder/sources.json index ce62bc79b77d8..a5eec5f3af843 100644 --- a/datahub-web-react/src/app/ingest/source/builder/sources.json +++ b/datahub-web-react/src/app/ingest/source/builder/sources.json @@ -284,7 +284,7 @@ "name": "csv-enricher", "displayName": "CSV", "description": "Import metadata from a formatted CSV.", - "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/csv'", + "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/csv-enricher", "recipe": "source: \n type: csv-enricher \n config: \n # URL of your csv file to ingest \n filename: \n array_delimiter: '|' \n delimiter: ',' \n write_semantics: PATCH" }, { diff --git a/datahub-web-react/src/app/ingest/source/conf/csv/csv.ts b/datahub-web-react/src/app/ingest/source/conf/csv/csv.ts index e1dc22c086fb4..e4cdee717923c 100644 --- a/datahub-web-react/src/app/ingest/source/conf/csv/csv.ts +++ b/datahub-web-react/src/app/ingest/source/conf/csv/csv.ts @@ -15,7 +15,7 @@ const csvConfig: SourceConfig = { type: 'csv-enricher', placeholderRecipe, displayName: 'CSV', - docsUrl: 'https://datahubproject.io/docs/generated/ingestion/sources/csv', + docsUrl: 'https://datahubproject.io/docs/generated/ingestion/sources/csv-enricher', logoUrl: csvLogo, }; diff --git a/datahub-web-react/src/graphql-mock/mutationHelper.ts b/datahub-web-react/src/graphql-mock/mutationHelper.ts index a97b41b53bc65..0cf4f5f87f29c 100644 --- a/datahub-web-react/src/graphql-mock/mutationHelper.ts +++ b/datahub-web-react/src/graphql-mock/mutationHelper.ts @@ -99,6 +99,7 @@ export const updateEntityLink = ({ entity, institutionalMemory }: UpdateEntityLi description: e.description as string, label: e.description as string, author: { urn: e.author, username: '', type: EntityType.CorpUser }, + actor: { urn: e.author, username: '', type: EntityType.CorpUser }, created: { time: Date.now(), actor: getActor(), __typename: 'AuditStamp' }, associatedUrn: dataEntity.urn, }; diff --git a/datahub-web-react/src/graphql/domain.graphql b/datahub-web-react/src/graphql/domain.graphql index 3897a2ced85b8..2e96a78b0f44b 100644 --- a/datahub-web-react/src/graphql/domain.graphql +++ b/datahub-web-react/src/graphql/domain.graphql @@ -19,9 +19,8 @@ query getDomain($urn: String!) { institutionalMemory { elements { url - author { - urn - username + actor { + ...resolvedActorFields } description created { diff --git a/datahub-web-react/src/graphql/fragments.graphql b/datahub-web-react/src/graphql/fragments.graphql index 7ce4082c42f61..67dbdbbb22f30 100644 --- a/datahub-web-react/src/graphql/fragments.graphql +++ b/datahub-web-react/src/graphql/fragments.graphql @@ -202,12 +202,22 @@ fragment embedFields on Embed { renderUrl } +fragment resolvedActorFields on ResolvedActor { + ... on CorpUser { + urn + ...entityDisplayNameFields + } + ... on CorpGroup { + urn + ...entityDisplayNameFields + } +} + fragment institutionalMemoryFields on InstitutionalMemory { elements { url - author { - urn - username + actor { + ...resolvedActorFields } description created { diff --git a/docker/profiles/docker-compose.gms.yml b/docker/profiles/docker-compose.gms.yml index 824c8024b05d6..e8b2d4cd1f29d 100644 --- a/docker/profiles/docker-compose.gms.yml +++ b/docker/profiles/docker-compose.gms.yml @@ -101,6 +101,7 @@ x-datahub-gms-service: &datahub-gms-service <<: [*primary-datastore-mysql-env, *graph-datastore-search-env, *search-datastore-env, *datahub-quickstart-telemetry-env, *kafka-env] ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE: ${ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE:-search_config.yaml} ALTERNATE_MCP_VALIDATION: ${ALTERNATE_MCP_VALIDATION:-true} + STRICT_URN_VALIDATION_ENABLED: ${STRICT_URN_VALIDATION_ENABLED:-true} healthcheck: test: curl -sS --fail http://datahub-gms:${DATAHUB_GMS_PORT:-8080}/health start_period: 90s @@ -183,6 +184,7 @@ x-datahub-mce-consumer-service: &datahub-mce-consumer-service environment: &datahub-mce-consumer-env <<: [*primary-datastore-mysql-env, *graph-datastore-search-env, *search-datastore-env, *datahub-quickstart-telemetry-env, *kafka-env] ALTERNATE_MCP_VALIDATION: ${ALTERNATE_MCP_VALIDATION:-true} + STRICT_URN_VALIDATION_ENABLED: ${STRICT_URN_VALIDATION_ENABLED:-true} x-datahub-mce-consumer-service-dev: &datahub-mce-consumer-service-dev <<: *datahub-mce-consumer-service diff --git a/docs/deploy/environment-vars.md b/docs/deploy/environment-vars.md index 6429996c088b4..13d7410397533 100644 --- a/docs/deploy/environment-vars.md +++ b/docs/deploy/environment-vars.md @@ -9,12 +9,13 @@ DataHub works. ## Feature Flags -| Variable | Default | Unit/Type | Components | Description | -|--------------------------------------------------|---------|-----------|-----------------------------------------|-----------------------------------------------------------------------------------------------------------------------------| -| `UI_INGESTION_ENABLED` | `true` | boolean | [`GMS`, `MCE Consumer`] | Enable UI based ingestion. | -| `DATAHUB_ANALYTICS_ENABLED` | `true` | boolean | [`Frontend`, `GMS`] | Collect DataHub usage to populate the analytics dashboard. | -| `BOOTSTRAP_SYSTEM_UPDATE_WAIT_FOR_SYSTEM_UPDATE` | `true` | boolean | [`GMS`, `MCE Consumer`, `MAE Consumer`] | Do not wait for the `system-update` to complete before starting. This should typically only be disabled during development. | -| `ER_MODEL_RELATIONSHIP_FEATURE_ENABLED` | `false` | boolean | [`Frontend`, `GMS`] | Enable ER Model Relation Feature that shows Relationships Tab within a Dataset UI. | +| Variable | Default | Unit/Type | Components | Description | +|--------------------------------------------------|----------|-----------|------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------| +| `UI_INGESTION_ENABLED` | `true` | boolean | [`GMS`, `MCE Consumer`] | Enable UI based ingestion. | +| `DATAHUB_ANALYTICS_ENABLED` | `true` | boolean | [`Frontend`, `GMS`] | Collect DataHub usage to populate the analytics dashboard. | +| `BOOTSTRAP_SYSTEM_UPDATE_WAIT_FOR_SYSTEM_UPDATE` | `true` | boolean | [`GMS`, `MCE Consumer`, `MAE Consumer`] | Do not wait for the `system-update` to complete before starting. This should typically only be disabled during development. | +| `ER_MODEL_RELATIONSHIP_FEATURE_ENABLED` | `false` | boolean | [`Frontend`, `GMS`] | Enable ER Model Relation Feature that shows Relationships Tab within a Dataset UI. | +| `STRICT_URN_VALIDATION_ENABLED` | `false` | boolean | [`GMS`, `MCE Consumer`, `MAE Consumer`] | Enable stricter URN validation logic | ## Ingestion diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index c21d197de29f0..087e30c2e541a 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -38,7 +38,7 @@ This file documents any backwards-incompatible changes in DataHub and assists pe ### Breaking Changes -- #11486 - Deprecated Criterion filters using `value`. Use `values` instead. This also deprecates the ability to use comma delimited string to represent multiple values using `value`. +- #11486 - Criterion's `value` parameter has been previously deprecated. Use of `value` instead of `values` is no longer supported and will be completely removed on the next major version. - #11484 - Metadata service authentication enabled by default - #11484 - Rest API authorization enabled by default - #10472 - `SANDBOX` added as a FabricType. No rollbacks allowed once metadata with this fabric type is added without manual cleanups in databases. @@ -88,6 +88,9 @@ This file documents any backwards-incompatible changes in DataHub and assists pe ### Other Notable Changes - Downgrade to previous version is not automatically supported. +- Data Product Properties Unset side effect introduced + - Previously, Data Products could be set as linked to multiple Datasets if modified directly via the REST API rather than linked through the UI or GraphQL. This side effect aligns the REST API behavior with the GraphQL behavior by introducting a side effect that enforces the 1-to-1 constraint between Data Products and Datasets + - NOTE: There is a pathological pattern of writes for Data Products that can introduce issues with write processing that can occur with this side effect. If you are constantly changing all of the Datasets associated with a Data Product back and forth between multiple Data Products it will result in a high volume of writes due to the need to unset previous associations. ## 0.14.0.2 diff --git a/docs/managed-datahub/release-notes/v_0_3_7.md b/docs/managed-datahub/release-notes/v_0_3_7.md index 19cb04e9f5603..af23b5ae1541b 100644 --- a/docs/managed-datahub/release-notes/v_0_3_7.md +++ b/docs/managed-datahub/release-notes/v_0_3_7.md @@ -32,7 +32,7 @@ If you are using an older CLI/SDK version, then please upgrade it. This applies datahub: timezone: 'America/Los_Angeles' ``` - - #11486 - Deprecated Criterion filters using `value`. Use `values` instead. This also deprecates the ability to use comma delimited string to represent multiple values using `value`. + - #11486 - Criterion's `value` parameter has been previously deprecated. Use of `value` instead of `values` is no longer supported and will be completely removed on the next major version. - #10472 - `SANDBOX` added as a FabricType. No rollbacks allowed once metadata with this fabric type is added without manual cleanups in databases. - #11619 - schema field/column paths can no longer be empty strings - #11619 - schema field/column paths can no longer be duplicated within the schema @@ -120,3 +120,6 @@ If you are using an older CLI/SDK version, then please upgrade it. This applies - (system / internal) Exclude form-prompt tests in live Metadata Tests evaluation - (system / internal) Exclude form-prompt tests in stored Metadata Test results - Elasticsearch reindex time limit of 8h removed + - Data Product Properties Unset side effect introduced + - Previously, Data Products could be set as linked to multiple Datasets if modified directly via the REST API rather than linked through the UI or GraphQL. This side effect aligns the REST API behavior with the GraphQL behavior by introducting a side effect that enforces the 1-to-1 constraint between Data Products and Datasets + - NOTE: There is a pathological pattern of writes for Data Products that can introduce issues with write processing that can occur with this side effect. If you are constantly changing all of the Datasets associated with a Data Product back and forth between multiple Data Products it will result in a high volume of writes due to the need to unset previous associations. diff --git a/docs/what/urn.md b/docs/what/urn.md index e35ca7fbaca4b..2f4dffb985653 100644 --- a/docs/what/urn.md +++ b/docs/what/urn.md @@ -38,7 +38,8 @@ urn:li:dataset:(urn:li:dataPlatform:hdfs,PageViewEvent,EI) There are a few restrictions when creating an urn: 1. Commas are reserved character in URN fields: `,` -2. Parentheses are reserved characters in URN fields: `( , )` +2. Parentheses are reserved characters in URN fields: `(` or `)` 3. Colons are reserved characters in URN fields: `:` +4. Urn separator UTF-8 character `␟` Please do not use these characters when creating or generating urns. One approach is to use URL encoding for the characters. diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/AspectsBatch.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/AspectsBatch.java index dc7934ad5cc19..30f5dce379a07 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/AspectsBatch.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/AspectsBatch.java @@ -9,6 +9,7 @@ import com.linkedin.util.Pair; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -49,7 +50,8 @@ default List getMCPItems() { * various hooks */ Pair>, List> toUpsertBatchItems( - Map> latestAspects); + Map> latestAspects, + Map> nextVersions); /** * Apply read mutations to batch @@ -227,4 +229,39 @@ static String toAbbreviatedString(Collection items, int max + StringUtils.abbreviate(itemsAbbreviated.toString(), maxWidth) + '}'; } + + /** + * Increment aspect within a batch, tracking both the next aspect version and the most recent + * + * @param changeMCP changeMCP to be incremented + * @param latestAspects lastest aspects within the batch + * @param nextVersions next version for the aspects in the batch + * @return the incremented changeMCP + */ + static ChangeMCP incrementBatchVersion( + ChangeMCP changeMCP, + Map> latestAspects, + Map> nextVersions) { + long nextVersion = + nextVersions + .getOrDefault(changeMCP.getUrn().toString(), Collections.emptyMap()) + .getOrDefault(changeMCP.getAspectName(), 0L); + + changeMCP.setPreviousSystemAspect( + latestAspects + .getOrDefault(changeMCP.getUrn().toString(), Collections.emptyMap()) + .getOrDefault(changeMCP.getAspectName(), null)); + + changeMCP.setNextAspectVersion(nextVersion); + + // support inner-batch upserts + latestAspects + .computeIfAbsent(changeMCP.getUrn().toString(), key -> new HashMap<>()) + .put(changeMCP.getAspectName(), changeMCP.getSystemAspect(nextVersion)); + nextVersions + .computeIfAbsent(changeMCP.getUrn().toString(), key -> new HashMap<>()) + .put(changeMCP.getAspectName(), nextVersion + 1); + + return changeMCP; + } } diff --git a/li-utils/src/main/java/com/linkedin/metadata/Constants.java b/li-utils/src/main/java/com/linkedin/metadata/Constants.java index f1f096640bc21..077e0e2b666be 100644 --- a/li-utils/src/main/java/com/linkedin/metadata/Constants.java +++ b/li-utils/src/main/java/com/linkedin/metadata/Constants.java @@ -10,6 +10,7 @@ public class Constants { public static final String INTERNAL_DELEGATED_FOR_ACTOR_HEADER_NAME = "X-DataHub-Delegated-For"; public static final String INTERNAL_DELEGATED_FOR_ACTOR_TYPE = "X-DataHub-Delegated-For-"; + public static final String URN_LI_PREFIX = "urn:li:"; public static final String DATAHUB_ACTOR = "urn:li:corpuser:datahub"; // Super user. public static final String SYSTEM_ACTOR = "urn:li:corpuser:__datahub_system"; // DataHub internal service principal. diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_datahub_ol_adapter.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_datahub_ol_adapter.py index 7d35791bf1db4..69de61aced0a5 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_datahub_ol_adapter.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_datahub_ol_adapter.py @@ -8,7 +8,6 @@ OL_SCHEME_TWEAKS = { "sqlserver": "mssql", - "trino": "presto", "awsathena": "athena", } diff --git a/metadata-ingestion/docs/sources/sigma/sigma_pre.md b/metadata-ingestion/docs/sources/sigma/sigma_pre.md index 382a2fe67b944..433f85a69f907 100644 --- a/metadata-ingestion/docs/sources/sigma/sigma_pre.md +++ b/metadata-ingestion/docs/sources/sigma/sigma_pre.md @@ -16,7 +16,7 @@ This source extracts the following: | Sigma | Datahub | Notes | |------------------------|---------------------------------------------------------------|----------------------------------| | `Workspace` | [Container](../../metamodel/entities/container.md) | SubType `"Sigma Workspace"` | -| `Workbook` | [Container](../../metamodel/entities/container.md) | SubType `"Sigma Workbook"` | +| `Workbook` | [Dashboard](../../metamodel/entities/dashboard.md) | SubType `"Sigma Workbook"` | | `Page` | [Dashboard](../../metamodel/entities/dashboard.md) | | | `Element` | [Chart](../../metamodel/entities/chart.md) | | | `Dataset` | [Dataset](../../metamodel/entities/dataset.md) | SubType `"Sigma Dataset"` | diff --git a/metadata-ingestion/examples/mce_files/bootstrap_mce.json b/metadata-ingestion/examples/mce_files/bootstrap_mce.json index f0c4e7ff996ed..bc218e5e8c2d5 100644 --- a/metadata-ingestion/examples/mce_files/bootstrap_mce.json +++ b/metadata-ingestion/examples/mce_files/bootstrap_mce.json @@ -3613,33 +3613,6 @@ }, "systemMetadata": null }, - { - "entityType": "post", - "entityUrn": "urn:li:post:f3a68539-f7e4-4c41-a4fd-9e57c085d8de", - "changeType": "UPSERT", - "aspectName": "postInfo", - "aspect": { - "json": { - "type": "HOME_PAGE_ANNOUNCEMENT", - "content": { - "title": "Join Metadata & AI Summit 2024", - "type": "LINK", - "link": "http://www.acryldata.io/conference?utm_source=datahub_quickstart&utm_medium=metadata_ai_2024&utm_campaign=pinned_announcement", - "media": { - "type": "IMAGE", - "location": "https://formulatedby.com/wp-content/uploads/2024/07/0193320a6d93e7508d1598f7b24662f75a87e92f-352x456-1.svg" - } - }, - "created": 1712547125049, - "lastModified": 1712547125049 - } - }, - "systemMetadata": { - "lastObserved": 1712548844816, - "runId": "datahub-2024_04_08-13_00_44", - "lastRunId": "no-run-id-provided" - } - }, { "entityType": "post", "entityUrn": "urn:li:post:f3a68539-f7e4-4c41-a4fd-9e57c085d8dd", diff --git a/metadata-ingestion/src/datahub/configuration/kafka_consumer_config.py b/metadata-ingestion/src/datahub/configuration/kafka_consumer_config.py index d3ff5998d3e79..cac6bb4996391 100644 --- a/metadata-ingestion/src/datahub/configuration/kafka_consumer_config.py +++ b/metadata-ingestion/src/datahub/configuration/kafka_consumer_config.py @@ -30,6 +30,9 @@ def _resolve_oauth_callback(self) -> None: call_back = self.get_call_back_attribute() - assert call_back # to silent lint + assert isinstance(call_back, str), ( + "oauth_cb must be a string representing python function reference " + "in the format :." + ) # Set the callback self._config[CallableConsumerConfig.CALLBACK_ATTRIBUTE] = import_path(call_back) diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py index 76c2fbf48ccab..16a5268a2dea7 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py @@ -95,6 +95,10 @@ def cleanup(config: BigQueryV2Config) -> None: "Optionally enabled via `classification.enabled`", supported=True, ) +@capability( + SourceCapability.PARTITION_SUPPORT, + "Enabled by default, partition keys and clustering keys are supported.", +) class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource): def __init__(self, ctx: PipelineContext, config: BigQueryV2Config): super().__init__(config, ctx) diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py index 7e8b2931282ff..06842da67f76c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py @@ -8,7 +8,7 @@ from datahub.ingestion.api.report import Report from datahub.ingestion.glossary.classification_mixin import ClassificationReportMixin -from datahub.ingestion.source.sql.sql_generic_profiler import ProfilingSqlReport +from datahub.ingestion.source.sql.sql_report import SQLSourceReport from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport from datahub.ingestion.source_report.time_window import BaseTimeWindowReport from datahub.sql_parsing.sql_parsing_aggregator import SqlAggregatorReport @@ -77,7 +77,7 @@ class BigQueryQueriesExtractorReport(Report): @dataclass class BigQueryV2Report( - ProfilingSqlReport, + SQLSourceReport, IngestionStageReport, BaseTimeWindowReport, ClassificationReportMixin, diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py index 58317b108bef4..3ce34be8dc89d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py @@ -118,7 +118,6 @@ class BigqueryTable(BaseTable): active_billable_bytes: Optional[int] = None long_term_billable_bytes: Optional[int] = None partition_info: Optional[PartitionInfo] = None - columns_ignore_from_profiling: List[str] = field(default_factory=list) external: bool = False constraints: List[BigqueryTableConstraint] = field(default_factory=list) table_type: Optional[str] = None @@ -152,6 +151,21 @@ class BigqueryDataset: snapshots: List[BigqueryTableSnapshot] = field(default_factory=list) columns: List[BigqueryColumn] = field(default_factory=list) + # Some INFORMATION_SCHEMA views are not available for BigLake tables + # based on Amazon S3 and Blob Storage data. + # https://cloud.google.com/bigquery/docs/omni-introduction#limitations + # Omni Locations - https://cloud.google.com/bigquery/docs/omni-introduction#locations + def is_biglake_dataset(self) -> bool: + return self.location is not None and self.location.lower().startswith( + ("aws-", "azure-") + ) + + def supports_table_constraints(self) -> bool: + return not self.is_biglake_dataset() + + def supports_table_partitions(self) -> bool: + return not self.is_biglake_dataset() + @dataclass class BigqueryProject: @@ -541,18 +555,26 @@ def get_table_constraints_for_dataset( table_name=constraint.table_name, type=constraint.constraint_type, field_path=constraint.column_name, - referenced_project_id=constraint.referenced_catalog - if constraint.constraint_type == "FOREIGN KEY" - else None, - referenced_dataset=constraint.referenced_schema - if constraint.constraint_type == "FOREIGN KEY" - else None, - referenced_table_name=constraint.referenced_table - if constraint.constraint_type == "FOREIGN KEY" - else None, - referenced_column_name=constraint.referenced_column - if constraint.constraint_type == "FOREIGN KEY" - else None, + referenced_project_id=( + constraint.referenced_catalog + if constraint.constraint_type == "FOREIGN KEY" + else None + ), + referenced_dataset=( + constraint.referenced_schema + if constraint.constraint_type == "FOREIGN KEY" + else None + ), + referenced_table_name=( + constraint.referenced_table + if constraint.constraint_type == "FOREIGN KEY" + else None + ), + referenced_column_name=( + constraint.referenced_column + if constraint.constraint_type == "FOREIGN KEY" + else None + ), ) ) self.report.num_get_table_constraints_for_dataset_api_requests += 1 diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py index 6f3008ccfd692..4a3b47f6b543a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py @@ -498,7 +498,10 @@ def _process_schema( report=self.report, rate_limiter=rate_limiter, ) - if self.config.include_table_constraints: + if ( + self.config.include_table_constraints + and bigquery_dataset.supports_table_constraints() + ): constraints = self.schema_api.get_table_constraints_for_dataset( project_id=project_id, dataset_name=dataset_name, report=self.report ) @@ -595,18 +598,6 @@ def _process_schema( dataset_name=dataset_name, ) - # This method is used to generate the ignore list for datatypes the profiler doesn't support we have to do it here - # because the profiler doesn't have access to columns - def generate_profile_ignore_list(self, columns: List[BigqueryColumn]) -> List[str]: - ignore_list: List[str] = [] - for column in columns: - if not column.data_type or any( - word in column.data_type.lower() - for word in ["array", "struct", "geography", "json"] - ): - ignore_list.append(column.field_path) - return ignore_list - def _process_table( self, table: BigqueryTable, @@ -628,15 +619,6 @@ def _process_table( ) table.column_count = len(columns) - # We only collect profile ignore list if profiling is enabled and profile_table_level_only is false - if ( - self.config.is_profiling_enabled() - and not self.config.profiling.profile_table_level_only - ): - table.columns_ignore_from_profiling = self.generate_profile_ignore_list( - columns - ) - if not table.column_count: logger.warning( f"Table doesn't have any column or unable to get columns for table: {table_identifier}" @@ -1157,9 +1139,11 @@ def gen_schema_metadata( # fields=[], fields=self.gen_schema_fields( columns, - table.constraints - if (isinstance(table, BigqueryTable) and table.constraints) - else [], + ( + table.constraints + if (isinstance(table, BigqueryTable) and table.constraints) + else [] + ), ), foreignKeys=foreign_keys if foreign_keys else None, ) @@ -1180,13 +1164,9 @@ def get_tables_for_dataset( ) -> Iterable[BigqueryTable]: # In bigquery there is no way to query all tables in a Project id with PerfTimer() as timer: - # PARTITIONS INFORMATION_SCHEMA view is not available for BigLake tables - # based on Amazon S3 and Blob Storage data. - # https://cloud.google.com/bigquery/docs/omni-introduction#limitations - # Omni Locations - https://cloud.google.com/bigquery/docs/omni-introduction#locations - with_partitions = self.config.have_table_data_read_permission and not ( - dataset.location - and dataset.location.lower().startswith(("aws-", "azure-")) + with_partitions = ( + self.config.have_table_data_read_permission + and dataset.supports_table_partitions() ) # Partitions view throw exception if we try to query partition info for too many tables diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/profiler.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/profiler.py index 6af8166fbf70c..182ae2265cb16 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/profiler.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/profiler.py @@ -166,12 +166,6 @@ def get_workunits( normalized_table_name = BigqueryTableIdentifier( project_id=project_id, dataset=dataset, table=table.name ).get_table_name() - for column in table.columns_ignore_from_profiling: - # Profiler has issues with complex types (array, struct, geography, json), so we deny those types from profiling - # We also filter columns without data type as it means that column is part of a complex type. - self.config.profile_pattern.deny.append( - f"^{normalized_table_name}.{column}$" - ) if table.external and not self.config.profiling.profile_external_tables: self.report.profiling_skipped_other[f"{project_id}.{dataset}"] += 1 diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_reporting.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_reporting.py index ccc685382f374..926dbd42eb267 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_reporting.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_reporting.py @@ -1,7 +1,7 @@ from dataclasses import dataclass from datetime import datetime -from datahub.ingestion.source.sql.sql_generic_profiler import ProfilingSqlReport +from datahub.ingestion.source.sql.sql_report import SQLSourceReport from datahub.ingestion.source.state.stale_entity_removal_handler import ( StaleEntityRemovalSourceReport, ) @@ -10,7 +10,7 @@ @dataclass class DremioSourceReport( - ProfilingSqlReport, StaleEntityRemovalSourceReport, IngestionStageReport + SQLSourceReport, StaleEntityRemovalSourceReport, IngestionStageReport ): num_containers_failed: int = 0 num_datasets_failed: int = 0 diff --git a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py index d175fce04a52c..f7d783cd3dec0 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py +++ b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py @@ -7,6 +7,7 @@ import functools import json import logging +import re import threading import traceback import unittest.mock @@ -55,7 +56,7 @@ Cardinality, convert_to_cardinality, ) -from datahub.ingestion.source.sql.sql_common import SQLSourceReport +from datahub.ingestion.source.sql.sql_report import SQLSourceReport from datahub.metadata.com.linkedin.pegasus2avro.schema import EditableSchemaMetadata from datahub.metadata.schema_classes import ( DatasetFieldProfileClass, @@ -123,6 +124,8 @@ _datasource_connection_injection_lock = threading.Lock() +NORMALIZE_TYPE_PATTERN = re.compile(r"^(.*?)(?:[\[<(].*)?$") + @contextlib.contextmanager def _inject_connection_into_datasource(conn: Connection) -> Iterator[None]: @@ -165,11 +168,9 @@ def get_column_unique_count_dh_patch(self: SqlAlchemyDataset, column: str) -> in return convert_to_json_serializable(element_values.fetchone()[0]) elif self.engine.dialect.name.lower() == BIGQUERY: element_values = self.engine.execute( - sa.select( - [ - sa.func.coalesce(sa.text(f"APPROX_COUNT_DISTINCT(`{column}`)")), - ] - ).select_from(self._table) + sa.select(sa.func.APPROX_COUNT_DISTINCT(sa.column(column))).select_from( + self._table + ) ) return convert_to_json_serializable(element_values.fetchone()[0]) elif self.engine.dialect.name.lower() == SNOWFLAKE: @@ -378,6 +379,9 @@ def _get_columns_to_profile(self) -> List[str]: f"{self.dataset_name}.{col}" ): ignored_columns_by_pattern.append(col) + # We try to ignore nested columns as well + elif not self.config.profile_nested_fields and "." in col: + ignored_columns_by_pattern.append(col) elif col_dict.get("type") and self._should_ignore_column(col_dict["type"]): ignored_columns_by_type.append(col) else: @@ -407,9 +411,18 @@ def _get_columns_to_profile(self) -> List[str]: return columns_to_profile def _should_ignore_column(self, sqlalchemy_type: sa.types.TypeEngine) -> bool: - return str(sqlalchemy_type) in _get_column_types_to_ignore( - self.dataset.engine.dialect.name - ) + # We don't profiles columns with None types + if str(sqlalchemy_type) == "NULL": + return True + + sql_type = str(sqlalchemy_type) + + match = re.match(NORMALIZE_TYPE_PATTERN, sql_type) + + if match: + sql_type = match.group(1) + + return sql_type in _get_column_types_to_ignore(self.dataset.engine.dialect.name) @_run_with_query_combiner def _get_column_type(self, column_spec: _SingleColumnSpec, column: str) -> None: @@ -1397,6 +1410,8 @@ def _get_ge_dataset( def _get_column_types_to_ignore(dialect_name: str) -> List[str]: if dialect_name.lower() == POSTGRESQL: return ["JSON"] + elif dialect_name.lower() == BIGQUERY: + return ["ARRAY", "STRUCT", "GEOGRAPHY", "JSON"] return [] diff --git a/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py b/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py index c20506e36a844..42d0def0a46e7 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py @@ -125,12 +125,16 @@ class GEProfilingConfig(GEProfilingBaseConfig): profile_table_size_limit: Optional[int] = Field( default=5, - description="Profile tables only if their size is less then specified GBs. If set to `null`, no limit on the size of tables to profile. Supported only in `snowflake` and `BigQuery`", + description="Profile tables only if their size is less than specified GBs. If set to `null`, " + "no limit on the size of tables to profile. Supported only in `snowflake` and `BigQuery`" + "Supported for `oracle` based on calculated size from gathered stats.", ) profile_table_row_limit: Optional[int] = Field( default=5000000, - description="Profile tables only if their row count is less then specified count. If set to `null`, no limit on the row count of tables to profile. Supported only in `snowflake` and `BigQuery`", + description="Profile tables only if their row count is less than specified count. If set to `null`, " + "no limit on the row count of tables to profile. Supported only in `snowflake` and `BigQuery`" + "Supported for `oracle` based on gathered stats.", ) profile_table_row_count_estimate_only: bool = Field( @@ -184,6 +188,11 @@ class GEProfilingConfig(GEProfilingBaseConfig): ), ) + profile_nested_fields: bool = Field( + default=False, + description="Whether to profile complex types like structs, arrays and maps. ", + ) + @pydantic.root_validator(pre=True) def deprecate_bigquery_temp_table_schema(cls, values): # TODO: Update docs to remove mention of this field. diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka/kafka.py b/metadata-ingestion/src/datahub/ingestion/source/kafka/kafka.py index 06d929774240b..e57dc853a83c6 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/kafka/kafka.py +++ b/metadata-ingestion/src/datahub/ingestion/source/kafka/kafka.py @@ -157,7 +157,9 @@ def get_kafka_consumer( if CallableConsumerConfig.is_callable_config(connection.consumer_config): # As per documentation, we need to explicitly call the poll method to make sure OAuth callback gets executed # https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#kafka-client-configuration + logger.debug("Initiating polling for kafka consumer") consumer.poll(timeout=30) + logger.debug("Initiated polling for kafka consumer") return consumer diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/report.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/report.py index ff28ed2c5e849..2748f2a588a93 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/report.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/report.py @@ -3,7 +3,7 @@ from typing import Dict, Optional from datahub.ingestion.glossary.classification_mixin import ClassificationReportMixin -from datahub.ingestion.source.sql.sql_generic_profiler import ProfilingSqlReport +from datahub.ingestion.source.sql.sql_report import SQLSourceReport from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport from datahub.ingestion.source_report.time_window import BaseTimeWindowReport from datahub.sql_parsing.sql_parsing_aggregator import SqlAggregatorReport @@ -14,7 +14,7 @@ @dataclass class RedshiftReport( - ProfilingSqlReport, + SQLSourceReport, IngestionStageReport, BaseTimeWindowReport, ClassificationReportMixin, diff --git a/metadata-ingestion/src/datahub/ingestion/source/sigma/data_classes.py b/metadata-ingestion/src/datahub/ingestion/source/sigma/data_classes.py index 922b0be3b4a93..5a657d804cb7b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sigma/data_classes.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sigma/data_classes.py @@ -80,6 +80,7 @@ class Workbook(BaseModel): path: str latestVersion: int workspaceId: Optional[str] = None + description: Optional[str] = None pages: List[Page] = [] badge: Optional[str] = None diff --git a/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma.py b/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma.py index dd4b65a2cbdf2..e96eeb58d96ef 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma.py @@ -4,7 +4,12 @@ import datahub.emitter.mce_builder as builder from datahub.configuration.common import ConfigurationError from datahub.emitter.mcp import MetadataChangeProposalWrapper -from datahub.emitter.mcp_builder import add_entity_to_container, gen_containers +from datahub.emitter.mcp_builder import ( + add_entity_to_container, + add_owner_to_entity_wu, + add_tags_to_entity_wu, + gen_containers, +) from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.decorators import ( SourceCapability, @@ -59,12 +64,14 @@ UpstreamLineage, ) from datahub.metadata.schema_classes import ( + AuditStampClass, BrowsePathEntryClass, BrowsePathsV2Class, ChangeAuditStampsClass, ChartInfoClass, DashboardInfoClass, DataPlatformInstanceClass, + EdgeClass, GlobalTagsClass, InputFieldClass, InputFieldsClass, @@ -74,6 +81,7 @@ SchemaFieldClass, SchemaFieldDataTypeClass, StringTypeClass, + SubTypesClass, TagAssociationClass, ) from datahub.sql_parsing.sqlglot_lineage import create_lineage_sql_parsed_result @@ -257,11 +265,6 @@ def _gen_entity_browsepath_aspect( entries = [ BrowsePathEntryClass(id=parent_entity_urn, urn=parent_entity_urn) ] + [BrowsePathEntryClass(id=path) for path in paths] - if self.config.platform_instance: - urn = builder.make_dataplatform_instance_urn( - self.platform, self.config.platform_instance - ) - entries = [BrowsePathEntryClass(id=urn, urn=urn)] + entries return MetadataChangeProposalWrapper( entityUrn=entity_urn, aspect=BrowsePathsV2Class(entries), @@ -424,11 +427,11 @@ def _gen_elements_workunit( elements: List[Element], workbook: Workbook, all_input_fields: List[InputFieldClass], + paths: List[str], ) -> Iterable[MetadataWorkUnit]: """ Map Sigma page element to Datahub Chart """ - for element in elements: chart_urn = builder.make_chart_urn( platform=self.platform, @@ -459,11 +462,14 @@ def _gen_elements_workunit( ), ).as_workunit() - yield from add_entity_to_container( - container_key=self._gen_workbook_key(workbook.workbookId), - entity_type="chart", - entity_urn=chart_urn, - ) + if workbook.workspaceId: + yield self._gen_entity_browsepath_aspect( + entity_urn=chart_urn, + parent_entity_urn=builder.make_container_urn( + self._gen_workspace_key(workbook.workspaceId) + ), + paths=paths + [workbook.name], + ) # Add sigma dataset's upstream dataset urn mapping for dataset_urn, upstream_dataset_urns in inputs.items(): @@ -494,7 +500,9 @@ def _gen_elements_workunit( all_input_fields.extend(element_input_fields) - def _gen_pages_workunit(self, workbook: Workbook) -> Iterable[MetadataWorkUnit]: + def _gen_pages_workunit( + self, workbook: Workbook, paths: List[str] + ) -> Iterable[MetadataWorkUnit]: """ Map Sigma workbook page to Datahub dashboard """ @@ -505,20 +513,23 @@ def _gen_pages_workunit(self, workbook: Workbook) -> Iterable[MetadataWorkUnit]: yield self._gen_dashboard_info_workunit(page) - yield from add_entity_to_container( - container_key=self._gen_workbook_key(workbook.workbookId), - entity_type="dashboard", - entity_urn=dashboard_urn, - ) - dpi_aspect = self._gen_dataplatform_instance_aspect(dashboard_urn) if dpi_aspect: yield dpi_aspect all_input_fields: List[InputFieldClass] = [] + if workbook.workspaceId: + yield self._gen_entity_browsepath_aspect( + entity_urn=dashboard_urn, + parent_entity_urn=builder.make_container_urn( + self._gen_workspace_key(workbook.workspaceId) + ), + paths=paths + [workbook.name], + ) + yield from self._gen_elements_workunit( - page.elements, workbook, all_input_fields + page.elements, workbook, all_input_fields, paths ) yield MetadataChangeProposalWrapper( @@ -531,42 +542,89 @@ def _gen_workbook_workunit(self, workbook: Workbook) -> Iterable[MetadataWorkUni Map Sigma Workbook to Datahub container """ owner_username = self.sigma_api.get_user_name(workbook.createdBy) - workbook_key = self._gen_workbook_key(workbook.workbookId) - yield from gen_containers( - container_key=workbook_key, - name=workbook.name, - sub_types=[BIContainerSubTypes.SIGMA_WORKBOOK], - parent_container_key=( - self._gen_workspace_key(workbook.workspaceId) - if workbook.workspaceId - else None + + dashboard_urn = self._gen_dashboard_urn(workbook.workbookId) + + yield self._gen_entity_status_aspect(dashboard_urn) + + lastModified = AuditStampClass( + time=int(workbook.updatedAt.timestamp() * 1000), + actor="urn:li:corpuser:datahub", + ) + created = AuditStampClass( + time=int(workbook.createdAt.timestamp() * 1000), + actor="urn:li:corpuser:datahub", + ) + + dashboard_info_cls = DashboardInfoClass( + title=workbook.name, + description=workbook.description if workbook.description else "", + dashboards=[ + EdgeClass( + destinationUrn=self._gen_dashboard_urn(page.get_urn_part()), + sourceUrn=dashboard_urn, + ) + for page in workbook.pages + ], + externalUrl=workbook.url, + lastModified=ChangeAuditStampsClass( + created=created, lastModified=lastModified ), - extra_properties={ + customProperties={ "path": workbook.path, "latestVersion": str(workbook.latestVersion), }, - owner_urn=( - builder.make_user_urn(owner_username) - if self.config.ingest_owner and owner_username - else None - ), - external_url=workbook.url, - tags=[workbook.badge] if workbook.badge else None, - created=int(workbook.createdAt.timestamp() * 1000), - last_modified=int(workbook.updatedAt.timestamp() * 1000), ) + yield MetadataChangeProposalWrapper( + entityUrn=dashboard_urn, aspect=dashboard_info_cls + ).as_workunit() + + # Set subtype + yield MetadataChangeProposalWrapper( + entityUrn=dashboard_urn, + aspect=SubTypesClass(typeNames=[BIContainerSubTypes.SIGMA_WORKBOOK]), + ).as_workunit() + + # Ownership + owner_urn = ( + builder.make_user_urn(owner_username) + if self.config.ingest_owner and owner_username + else None + ) + if owner_urn: + yield from add_owner_to_entity_wu( + entity_type="dashboard", + entity_urn=dashboard_urn, + owner_urn=owner_urn, + ) + + # Tags + tags = [workbook.badge] if workbook.badge else None + if tags: + yield from add_tags_to_entity_wu( + entity_type="dashboard", + entity_urn=dashboard_urn, + tags=sorted(tags), + ) paths = workbook.path.split("/")[1:] - if len(paths) > 0 and workbook.workspaceId: + if workbook.workspaceId: yield self._gen_entity_browsepath_aspect( - entity_urn=builder.make_container_urn(workbook_key), + entity_urn=dashboard_urn, parent_entity_urn=builder.make_container_urn( self._gen_workspace_key(workbook.workspaceId) ), - paths=paths, + paths=paths + [workbook.name], ) - yield from self._gen_pages_workunit(workbook) + if len(paths) == 0: + yield from add_entity_to_container( + container_key=self._gen_workspace_key(workbook.workspaceId), + entity_type="dashboard", + entity_urn=dashboard_urn, + ) + + yield from self._gen_pages_workunit(workbook, paths) def _gen_sigma_dataset_upstream_lineage_workunit( self, diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py index 80b6be36e5ffa..b5f56f99431f9 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py @@ -5,7 +5,7 @@ from datahub.ingestion.api.report import Report from datahub.ingestion.glossary.classification_mixin import ClassificationReportMixin from datahub.ingestion.source.snowflake.constants import SnowflakeEdition -from datahub.ingestion.source.sql.sql_generic_profiler import ProfilingSqlReport +from datahub.ingestion.source.sql.sql_report import SQLSourceReport from datahub.ingestion.source.state.stateful_ingestion_base import ( StatefulIngestionReport, ) @@ -59,7 +59,7 @@ class SnowflakeUsageReport: @dataclass -class SnowflakeReport(ProfilingSqlReport, BaseTimeWindowReport): +class SnowflakeReport(SQLSourceReport, BaseTimeWindowReport): num_table_to_table_edges_scanned: int = 0 num_table_to_view_edges_scanned: int = 0 num_view_to_table_edges_scanned: int = 0 diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/job_models.py b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/job_models.py index 21e7fad334331..5107a4e38f64d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/job_models.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/job_models.py @@ -101,6 +101,7 @@ class StoredProcedure: flow: Union[MSSQLJob, MSSQLProceduresContainer] type: str = "STORED_PROCEDURE" source: str = "mssql" + code: Optional[str] = None @property def full_type(self) -> str: diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py index c19b22a8622ca..7a2dbda8b4a93 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py @@ -24,6 +24,8 @@ platform_name, support_status, ) +from datahub.ingestion.api.source import StructuredLogLevel +from datahub.ingestion.api.source_helpers import auto_workunit from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source.sql.mssql.job_models import ( JobStep, @@ -36,6 +38,9 @@ ProcedureParameter, StoredProcedure, ) +from datahub.ingestion.source.sql.mssql.stored_procedure_lineage import ( + generate_procedure_lineage, +) from datahub.ingestion.source.sql.sql_common import ( SQLAlchemySource, SqlWorkUnit, @@ -45,12 +50,14 @@ BasicSQLAlchemyConfig, make_sqlalchemy_uri, ) +from datahub.ingestion.source.sql.sql_report import SQLSourceReport from datahub.metadata.schema_classes import ( BooleanTypeClass, NumberTypeClass, StringTypeClass, UnionTypeClass, ) +from datahub.utilities.file_backed_collections import FileBackedList logger: logging.Logger = logging.getLogger(__name__) @@ -72,6 +79,11 @@ class SQLServerConfig(BasicSQLAlchemyConfig): include_stored_procedures_code: bool = Field( default=True, description="Include information about object code." ) + procedure_pattern: AllowDenyPattern = Field( + default=AllowDenyPattern.allow_all(), + description="Regex patterns for stored procedures to filter in ingestion." + "Specify regex to match the entire procedure name in database.schema.procedure_name format. e.g. to match all procedures starting with customer in Customer database and public schema, use the regex 'Customer.public.customer.*'", + ) include_jobs: bool = Field( default=True, description="Include ingest of MSSQL Jobs. Requires access to the 'msdb' and 'sys' schema.", @@ -99,6 +111,10 @@ class SQLServerConfig(BasicSQLAlchemyConfig): default=False, description="Enable to convert the SQL Server assets urns to lowercase", ) + include_lineage: bool = Field( + default=True, + description="Enable lineage extraction for stored procedures", + ) @pydantic.validator("uri_args") def passwords_match(cls, v, values, **kwargs): @@ -154,6 +170,8 @@ class SQLServerSource(SQLAlchemySource): If you do use pyodbc, make sure to change the source type from `mssql` to `mssql-odbc` so that we pull in the right set of dependencies. This will be needed in most cases where encryption is required, such as managed SQL Server services in Azure. """ + report: SQLSourceReport + def __init__(self, config: SQLServerConfig, ctx: PipelineContext): super().__init__(config, ctx, "mssql") # Cache the table and column descriptions @@ -161,6 +179,7 @@ def __init__(self, config: SQLServerConfig, ctx: PipelineContext): self.current_database = None self.table_descriptions: Dict[str, str] = {} self.column_descriptions: Dict[str, str] = {} + self.stored_procedures: FileBackedList[StoredProcedure] = FileBackedList() if self.config.include_descriptions: for inspector in self.get_inspectors(): db_name: str = self.get_db_name(inspector) @@ -374,7 +393,7 @@ def loop_jobs( def loop_job_steps( self, job: MSSQLJob, job_steps: Dict[str, Any] ) -> Iterable[MetadataWorkUnit]: - for step_id, step_data in job_steps.items(): + for _step_id, step_data in job_steps.items(): step = JobStep( job_name=job.formatted_name, step_name=step_data["step_name"], @@ -405,44 +424,57 @@ def loop_stored_procedures( # noqa: C901 data_flow = MSSQLDataFlow(entity=mssql_default_job) with inspector.engine.connect() as conn: procedures_data_list = self._get_stored_procedures(conn, db_name, schema) - procedures = [ - StoredProcedure(flow=mssql_default_job, **procedure_data) - for procedure_data in procedures_data_list - ] + procedures: List[StoredProcedure] = [] + for procedure_data in procedures_data_list: + procedure_full_name = f"{db_name}.{schema}.{procedure_data['name']}" + if not self.config.procedure_pattern.allowed(procedure_full_name): + self.report.report_dropped(procedure_full_name) + continue + procedures.append( + StoredProcedure(flow=mssql_default_job, **procedure_data) + ) + if procedures: yield from self.construct_flow_workunits(data_flow=data_flow) for procedure in procedures: - upstream = self._get_procedure_upstream(conn, procedure) - downstream = self._get_procedure_downstream(conn, procedure) - data_job = MSSQLDataJob( - entity=procedure, - ) - # TODO: because of this upstream and downstream are more dependencies, - # can't be used as DataJobInputOutput. - # Should be reorganized into lineage. - data_job.add_property("procedure_depends_on", str(upstream.as_property)) - data_job.add_property( - "depending_on_procedure", str(downstream.as_property) - ) - procedure_definition, procedure_code = self._get_procedure_code( - conn, procedure - ) - if procedure_definition: - data_job.add_property("definition", procedure_definition) - if sql_config.include_stored_procedures_code and procedure_code: - data_job.add_property("code", procedure_code) - procedure_inputs = self._get_procedure_inputs(conn, procedure) - properties = self._get_procedure_properties(conn, procedure) - data_job.add_property( - "input parameters", str([param.name for param in procedure_inputs]) - ) - for param in procedure_inputs: - data_job.add_property( - f"parameter {param.name}", str(param.properties) - ) - for property_name, property_value in properties.items(): - data_job.add_property(property_name, str(property_value)) - yield from self.construct_job_workunits(data_job) + yield from self._process_stored_procedure(conn, procedure) + + def _process_stored_procedure( + self, conn: Connection, procedure: StoredProcedure + ) -> Iterable[MetadataWorkUnit]: + upstream = self._get_procedure_upstream(conn, procedure) + downstream = self._get_procedure_downstream(conn, procedure) + data_job = MSSQLDataJob( + entity=procedure, + ) + # TODO: because of this upstream and downstream are more dependencies, + # can't be used as DataJobInputOutput. + # Should be reorganized into lineage. + data_job.add_property("procedure_depends_on", str(upstream.as_property)) + data_job.add_property("depending_on_procedure", str(downstream.as_property)) + procedure_definition, procedure_code = self._get_procedure_code(conn, procedure) + procedure.code = procedure_code + if procedure_definition: + data_job.add_property("definition", procedure_definition) + if procedure_code and self.config.include_stored_procedures_code: + data_job.add_property("code", procedure_code) + procedure_inputs = self._get_procedure_inputs(conn, procedure) + properties = self._get_procedure_properties(conn, procedure) + data_job.add_property( + "input parameters", str([param.name for param in procedure_inputs]) + ) + for param in procedure_inputs: + data_job.add_property(f"parameter {param.name}", str(param.properties)) + for property_name, property_value in properties.items(): + data_job.add_property(property_name, str(property_value)) + if self.config.include_lineage: + # These will be used to construct lineage + self.stored_procedures.append(procedure) + yield from self.construct_job_workunits( + data_job, + # For stored procedure lineage is ingested later + include_lineage=False, + ) @staticmethod def _get_procedure_downstream( @@ -546,8 +578,8 @@ def _get_procedure_code( code_list.append(row["Text"]) if code_slice_text in re.sub(" +", " ", row["Text"].lower()).strip(): code_slice_index = index - definition = "\n".join(code_list[:code_slice_index]) - code = "\n".join(code_list[code_slice_index:]) + definition = "".join(code_list[:code_slice_index]) + code = "".join(code_list[code_slice_index:]) except ResourceClosedError: logger.warning( "Connection was closed from procedure '%s'", @@ -602,16 +634,18 @@ def _get_stored_procedures( def construct_job_workunits( self, data_job: MSSQLDataJob, + include_lineage: bool = True, ) -> Iterable[MetadataWorkUnit]: yield MetadataChangeProposalWrapper( entityUrn=data_job.urn, aspect=data_job.as_datajob_info_aspect, ).as_workunit() - yield MetadataChangeProposalWrapper( - entityUrn=data_job.urn, - aspect=data_job.as_datajob_input_output_aspect, - ).as_workunit() + if include_lineage: + yield MetadataChangeProposalWrapper( + entityUrn=data_job.urn, + aspect=data_job.as_datajob_input_output_aspect, + ).as_workunit() # TODO: Add SubType when it appear def construct_flow_workunits( @@ -664,3 +698,58 @@ def get_identifier( if self.config.convert_urns_to_lowercase else qualified_table_name ) + + def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: + yield from super().get_workunits_internal() + + # This is done at the end so that we will have access to tables + # from all databases in schema_resolver and discovered_tables + for procedure in self.stored_procedures: + with self.report.report_exc( + message="Failed to parse stored procedure lineage", + context=procedure.full_name, + level=StructuredLogLevel.WARN, + ): + yield from auto_workunit( + generate_procedure_lineage( + schema_resolver=self.schema_resolver, + procedure=procedure, + procedure_job_urn=MSSQLDataJob(entity=procedure).urn, + is_temp_table=self.is_temp_table, + ) + ) + + def is_temp_table(self, name: str) -> bool: + try: + parts = name.split(".") + table_name = parts[-1] + schema_name = parts[-2] + db_name = parts[-3] + + if table_name.startswith("#"): + return True + + # This is also a temp table if + # 1. this name would be allowed by the dataset patterns, and + # 2. we have a list of discovered tables, and + # 3. it's not in the discovered tables list + if ( + self.config.database_pattern.allowed(db_name) + and self.config.schema_pattern.allowed(schema_name) + and self.config.table_pattern.allowed(name) + and self.standardize_identifier_case(name) + not in self.discovered_datasets + ): + logger.debug(f"inferred as temp table {name}") + return True + + except Exception: + logger.warning(f"Error parsing table name {name} ") + return False + + def standardize_identifier_case(self, table_ref_str: str) -> str: + return ( + table_ref_str.lower() + if self.config.convert_urns_to_lowercase + else table_ref_str + ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/stored_procedure_lineage.py b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/stored_procedure_lineage.py new file mode 100644 index 0000000000000..b979a270a5528 --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/stored_procedure_lineage.py @@ -0,0 +1,84 @@ +import logging +from typing import Callable, Iterable, Optional + +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.ingestion.source.sql.mssql.job_models import StoredProcedure +from datahub.metadata.schema_classes import DataJobInputOutputClass +from datahub.sql_parsing.datajob import to_datajob_input_output +from datahub.sql_parsing.schema_resolver import SchemaResolver +from datahub.sql_parsing.split_statements import split_statements +from datahub.sql_parsing.sql_parsing_aggregator import ( + ObservedQuery, + SqlParsingAggregator, +) + +logger = logging.getLogger(__name__) + + +def parse_procedure_code( + *, + schema_resolver: SchemaResolver, + default_db: Optional[str], + default_schema: Optional[str], + code: str, + is_temp_table: Callable[[str], bool], + raise_: bool = False, +) -> Optional[DataJobInputOutputClass]: + aggregator = SqlParsingAggregator( + platform=schema_resolver.platform, + env=schema_resolver.env, + schema_resolver=schema_resolver, + generate_lineage=True, + generate_queries=False, + generate_usage_statistics=False, + generate_operations=False, + generate_query_subject_fields=False, + generate_query_usage_statistics=False, + is_temp_table=is_temp_table, + ) + for query in split_statements(code): + # TODO: We should take into account `USE x` statements. + aggregator.add_observed_query( + observed=ObservedQuery( + default_db=default_db, + default_schema=default_schema, + query=query, + ) + ) + if aggregator.report.num_observed_queries_failed and raise_: + logger.info(aggregator.report.as_string()) + raise ValueError( + f"Failed to parse {aggregator.report.num_observed_queries_failed} queries." + ) + + mcps = list(aggregator.gen_metadata()) + return to_datajob_input_output( + mcps=mcps, + ignore_extra_mcps=True, + ) + + +# Is procedure handling generic enough to be added to SqlParsingAggregator? +def generate_procedure_lineage( + *, + schema_resolver: SchemaResolver, + procedure: StoredProcedure, + procedure_job_urn: str, + is_temp_table: Callable[[str], bool] = lambda _: False, + raise_: bool = False, +) -> Iterable[MetadataChangeProposalWrapper]: + if procedure.code: + datajob_input_output = parse_procedure_code( + schema_resolver=schema_resolver, + default_db=procedure.db, + default_schema=procedure.schema, + code=procedure.code, + is_temp_table=is_temp_table, + raise_=raise_, + ) + + if datajob_input_output: + yield MetadataChangeProposalWrapper( + entityUrn=procedure_job_urn, + aspect=datajob_input_output, + ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py index 766b704d6ffaf..52db3cd11a759 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py @@ -1,3 +1,4 @@ +import datetime import logging import re @@ -631,3 +632,52 @@ def get_workunits(self): clear=False, ): return super().get_workunits() + + def generate_profile_candidates( + self, + inspector: Inspector, + threshold_time: Optional[datetime.datetime], + schema: str, + ) -> Optional[List[str]]: + tables_table_name = ( + "ALL_TABLES" if self.config.data_dictionary_mode == "ALL" else "DBA_TABLES" + ) + + # If stats are available , they are used even if they are stale. + # Assuming that the table would typically grow over time, this will ensure to filter + # large tables known at stats collection time from profiling candidates. + # If stats are not available (NULL), such tables are not filtered and are considered + # as profiling candidates. + cursor = inspector.bind.execute( + sql.text( + f"""SELECT + t.OWNER, + t.TABLE_NAME, + t.NUM_ROWS, + t.LAST_ANALYZED, + COALESCE(t.NUM_ROWS * t.AVG_ROW_LEN, 0) / (1024 * 1024 * 1024) AS SIZE_GB + FROM {tables_table_name} t + WHERE t.OWNER = :owner + AND (t.NUM_ROWS < :table_row_limit OR t.NUM_ROWS IS NULL) + AND COALESCE(t.NUM_ROWS * t.AVG_ROW_LEN, 0) / (1024 * 1024 * 1024) < :table_size_limit + """ + ), + dict( + owner=inspector.dialect.denormalize_name(schema), + table_row_limit=self.config.profiling.profile_table_row_limit, + table_size_limit=self.config.profiling.profile_table_size_limit, + ), + ) + + TABLE_NAME_COL_LOC = 1 + return [ + self.get_identifier( + schema=schema, + entity=inspector.dialect.normalize_name(row[TABLE_NAME_COL_LOC]) + or _raise_err( + ValueError(f"Invalid table name: {row[TABLE_NAME_COL_LOC]}") + ), + inspector=inspector, + ) + for row in cursor + ] diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py index e5779791ed412..41ffcb95a7cc4 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py @@ -51,7 +51,6 @@ from datahub.ingestion.glossary.classification_mixin import ( SAMPLE_SIZE_MULTIPLIER, ClassificationHandler, - ClassificationReportMixin, ) from datahub.ingestion.source.common.data_reader import DataReader from datahub.ingestion.source.common.subtypes import ( @@ -59,6 +58,7 @@ DatasetSubTypes, ) from datahub.ingestion.source.sql.sql_config import SQLCommonConfig +from datahub.ingestion.source.sql.sql_report import SQLSourceReport from datahub.ingestion.source.sql.sql_utils import ( add_table_to_schema_container, downgrade_schema_from_v2, @@ -74,7 +74,6 @@ ) from datahub.ingestion.source.state.stale_entity_removal_handler import ( StaleEntityRemovalHandler, - StaleEntityRemovalSourceReport, ) from datahub.ingestion.source.state.stateful_ingestion_base import ( StatefulIngestionSourceBase, @@ -118,9 +117,7 @@ ) from datahub.telemetry import telemetry from datahub.utilities.file_backed_collections import FileBackedDict -from datahub.utilities.lossy_collections import LossyList from datahub.utilities.registries.domain_registry import DomainRegistry -from datahub.utilities.sqlalchemy_query_combiner import SQLAlchemyQueryCombinerReport from datahub.utilities.sqlalchemy_type_converter import ( get_native_data_type_for_sqlalchemy_type, ) @@ -134,43 +131,6 @@ logger: logging.Logger = logging.getLogger(__name__) -@dataclass -class SQLSourceReport(StaleEntityRemovalSourceReport, ClassificationReportMixin): - tables_scanned: int = 0 - views_scanned: int = 0 - entities_profiled: int = 0 - filtered: LossyList[str] = field(default_factory=LossyList) - - query_combiner: Optional[SQLAlchemyQueryCombinerReport] = None - - num_view_definitions_parsed: int = 0 - num_view_definitions_failed_parsing: int = 0 - num_view_definitions_failed_column_parsing: int = 0 - view_definitions_parsing_failures: LossyList[str] = field(default_factory=LossyList) - - def report_entity_scanned(self, name: str, ent_type: str = "table") -> None: - """ - Entity could be a view or a table - """ - if ent_type == "table": - self.tables_scanned += 1 - elif ent_type == "view": - self.views_scanned += 1 - else: - raise KeyError(f"Unknown entity {ent_type}.") - - def report_entity_profiled(self, name: str) -> None: - self.entities_profiled += 1 - - def report_dropped(self, ent_name: str) -> None: - self.filtered.append(ent_name) - - def report_from_query_combiner( - self, query_combiner_report: SQLAlchemyQueryCombinerReport - ) -> None: - self.query_combiner = query_combiner_report - - class SqlWorkUnit(MetadataWorkUnit): pass @@ -352,7 +312,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource): def __init__(self, config: SQLCommonConfig, ctx: PipelineContext, platform: str): super().__init__(config, ctx) - self.config = config + self.config: SQLCommonConfig = config self.platform = platform self.report: SQLSourceReport = SQLSourceReport() self.profile_metadata_info: ProfileMetadata = ProfileMetadata() @@ -392,6 +352,7 @@ def __init__(self, config: SQLCommonConfig, ctx: PipelineContext, platform: str) platform_instance=self.config.platform_instance, env=self.config.env, ) + self.discovered_datasets: Set[str] = set() self._view_definition_cache: MutableMapping[str, str] if self.config.use_file_backed_cache: self._view_definition_cache = FileBackedDict[str]() @@ -831,8 +792,9 @@ def _process_table( self._classify(dataset_name, schema, table, data_reader, schema_metadata) dataset_snapshot.aspects.append(schema_metadata) - if self.config.include_view_lineage: + if self._save_schema_to_resolver(): self.schema_resolver.add_schema_metadata(dataset_urn, schema_metadata) + self.discovered_datasets.add(dataset_name) db_name = self.get_db_name(inspector) yield from self.add_table_to_schema_container( @@ -1126,8 +1088,9 @@ def _process_view( columns, canonical_schema=schema_fields, ) - if self.config.include_view_lineage: + if self._save_schema_to_resolver(): self.schema_resolver.add_schema_metadata(dataset_urn, schema_metadata) + self.discovered_datasets.add(dataset_name) description, properties, _ = self.get_table_properties(inspector, schema, view) try: view_definition = inspector.get_view_definition(view, schema) @@ -1190,6 +1153,11 @@ def _process_view( domain_registry=self.domain_registry, ) + def _save_schema_to_resolver(self): + return self.config.include_view_lineage or ( + hasattr(self.config, "include_lineage") and self.config.include_lineage + ) + def _run_sql_parser( self, view_identifier: str, query: str, schema_resolver: SchemaResolver ) -> Optional[SqlParsingResult]: @@ -1274,17 +1242,22 @@ def generate_profile_candidates( def is_dataset_eligible_for_profiling( self, dataset_name: str, - sql_config: SQLCommonConfig, + schema: str, inspector: Inspector, profile_candidates: Optional[List[str]], ) -> bool: - return ( - sql_config.table_pattern.allowed(dataset_name) - and sql_config.profile_pattern.allowed(dataset_name) - ) and ( - profile_candidates is None - or (profile_candidates is not None and dataset_name in profile_candidates) - ) + if not ( + self.config.table_pattern.allowed(dataset_name) + and self.config.profile_pattern.allowed(dataset_name) + ): + self.report.profiling_skipped_table_profile_pattern[schema] += 1 + return False + + if profile_candidates is not None and dataset_name not in profile_candidates: + self.report.profiling_skipped_other[schema] += 1 + return False + + return True def loop_profiler_requests( self, @@ -1299,7 +1272,7 @@ def loop_profiler_requests( if ( sql_config.profiling.profile_if_updated_since_days is not None or sql_config.profiling.profile_table_size_limit is not None - or sql_config.profiling.profile_table_row_limit is None + or sql_config.profiling.profile_table_row_limit is not None ): try: threshold_time: Optional[datetime.datetime] = None @@ -1320,8 +1293,9 @@ def loop_profiler_requests( schema=schema, entity=table, inspector=inspector ) if not self.is_dataset_eligible_for_profiling( - dataset_name, sql_config, inspector, profile_candidates + dataset_name, schema, inspector, profile_candidates ): + self.report.num_tables_not_eligible_profiling[schema] += 1 if self.config.profiling.report_dropped_profiles: self.report.report_dropped(f"profile of {dataset_name}") continue diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_generic_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_generic_profiler.py index 9c8e475e7b307..bd6c23cc2d464 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_generic_profiler.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_generic_profiler.py @@ -1,6 +1,6 @@ import logging from abc import abstractmethod -from dataclasses import dataclass, field +from dataclasses import dataclass from datetime import datetime, timedelta, timezone from typing import Dict, Iterable, List, Optional, Union, cast @@ -14,42 +14,13 @@ DatahubGEProfiler, GEProfilerRequest, ) -from datahub.ingestion.source.sql.sql_common import SQLSourceReport from datahub.ingestion.source.sql.sql_config import SQLCommonConfig from datahub.ingestion.source.sql.sql_generic import BaseTable, BaseView +from datahub.ingestion.source.sql.sql_report import SQLSourceReport from datahub.ingestion.source.sql.sql_utils import check_table_with_profile_pattern from datahub.ingestion.source.state.profiling_state_handler import ProfilingHandler from datahub.metadata.com.linkedin.pegasus2avro.dataset import DatasetProfile from datahub.metadata.com.linkedin.pegasus2avro.timeseries import PartitionType -from datahub.utilities.stats_collections import TopKDict, int_top_k_dict - - -@dataclass -class DetailedProfilerReportMixin: - profiling_skipped_not_updated: TopKDict[str, int] = field( - default_factory=int_top_k_dict - ) - profiling_skipped_size_limit: TopKDict[str, int] = field( - default_factory=int_top_k_dict - ) - - profiling_skipped_row_limit: TopKDict[str, int] = field( - default_factory=int_top_k_dict - ) - - profiling_skipped_table_profile_pattern: TopKDict[str, int] = field( - default_factory=int_top_k_dict - ) - - profiling_skipped_other: TopKDict[str, int] = field(default_factory=int_top_k_dict) - - num_tables_not_eligible_profiling: Dict[str, int] = field( - default_factory=int_top_k_dict - ) - - -class ProfilingSqlReport(DetailedProfilerReportMixin, SQLSourceReport): - pass @dataclass @@ -65,7 +36,7 @@ class GenericProfiler: def __init__( self, config: SQLCommonConfig, - report: ProfilingSqlReport, + report: SQLSourceReport, platform: str, state_handler: Optional[ProfilingHandler] = None, ) -> None: diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_report.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_report.py new file mode 100644 index 0000000000000..c1f722b5d1e78 --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_report.py @@ -0,0 +1,75 @@ +from dataclasses import dataclass, field +from typing import Dict, Optional + +from datahub.ingestion.glossary.classification_mixin import ClassificationReportMixin +from datahub.ingestion.source.state.stale_entity_removal_handler import ( + StaleEntityRemovalSourceReport, +) +from datahub.utilities.lossy_collections import LossyList +from datahub.utilities.sqlalchemy_query_combiner import SQLAlchemyQueryCombinerReport +from datahub.utilities.stats_collections import TopKDict, int_top_k_dict + + +@dataclass +class DetailedProfilerReportMixin: + profiling_skipped_not_updated: TopKDict[str, int] = field( + default_factory=int_top_k_dict + ) + profiling_skipped_size_limit: TopKDict[str, int] = field( + default_factory=int_top_k_dict + ) + + profiling_skipped_row_limit: TopKDict[str, int] = field( + default_factory=int_top_k_dict + ) + + profiling_skipped_table_profile_pattern: TopKDict[str, int] = field( + default_factory=int_top_k_dict + ) + + profiling_skipped_other: TopKDict[str, int] = field(default_factory=int_top_k_dict) + + num_tables_not_eligible_profiling: Dict[str, int] = field( + default_factory=int_top_k_dict + ) + + +@dataclass +class SQLSourceReport( + StaleEntityRemovalSourceReport, + ClassificationReportMixin, + DetailedProfilerReportMixin, +): + tables_scanned: int = 0 + views_scanned: int = 0 + entities_profiled: int = 0 + filtered: LossyList[str] = field(default_factory=LossyList) + + query_combiner: Optional[SQLAlchemyQueryCombinerReport] = None + + num_view_definitions_parsed: int = 0 + num_view_definitions_failed_parsing: int = 0 + num_view_definitions_failed_column_parsing: int = 0 + view_definitions_parsing_failures: LossyList[str] = field(default_factory=LossyList) + + def report_entity_scanned(self, name: str, ent_type: str = "table") -> None: + """ + Entity could be a view or a table + """ + if ent_type == "table": + self.tables_scanned += 1 + elif ent_type == "view": + self.views_scanned += 1 + else: + raise KeyError(f"Unknown entity {ent_type}.") + + def report_entity_profiled(self, name: str) -> None: + self.entities_profiled += 1 + + def report_dropped(self, ent_name: str) -> None: + self.filtered.append(ent_name) + + def report_from_query_combiner( + self, query_combiner_report: SQLAlchemyQueryCombinerReport + ) -> None: + self.query_combiner = query_combiner_report diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py b/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py index 53b1ddfcde595..e42564975c3d1 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py @@ -44,7 +44,7 @@ from datahub.ingestion.graph.client import DataHubGraph from datahub.ingestion.source.sql.sql_common import SqlWorkUnit, register_custom_type from datahub.ingestion.source.sql.sql_config import SQLCommonConfig -from datahub.ingestion.source.sql.sql_generic_profiler import ProfilingSqlReport +from datahub.ingestion.source.sql.sql_report import SQLSourceReport from datahub.ingestion.source.sql.two_tier_sql_source import ( TwoTierSQLAlchemyConfig, TwoTierSQLAlchemySource, @@ -330,7 +330,7 @@ def optimized_get_view_definition( @dataclass -class TeradataReport(ProfilingSqlReport, IngestionStageReport, BaseTimeWindowReport): +class TeradataReport(SQLSourceReport, IngestionStageReport, BaseTimeWindowReport): num_queries_parsed: int = 0 num_view_ddl_parsed: int = 0 num_table_parse_failures: int = 0 diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py b/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py index a340f049731c4..92487d48b99e6 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py @@ -27,7 +27,6 @@ from datahub.ingestion.source.common.data_reader import DataReader from datahub.ingestion.source.sql.sql_common import ( SQLAlchemySource, - SQLSourceReport, SqlWorkUnit, get_schema_metadata, ) @@ -35,6 +34,7 @@ BasicSQLAlchemyConfig, SQLCommonConfig, ) +from datahub.ingestion.source.sql.sql_report import SQLSourceReport from datahub.ingestion.source.sql.sql_utils import get_domain_wu from datahub.metadata.com.linkedin.pegasus2avro.common import StatusClass from datahub.metadata.com.linkedin.pegasus2avro.dataset import UpstreamLineage @@ -536,7 +536,7 @@ def loop_profiler_requests( ) if not self.is_dataset_eligible_for_profiling( - dataset_name, sql_config, inspector, profile_candidates + dataset_name, schema, inspector, profile_candidates ): if self.config.profiling.report_dropped_profiles: self.report.report_dropped(f"profile of {dataset_name}") diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_common.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_common.py index 8d6746b6433a4..ac917c5f128ed 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_common.py @@ -643,8 +643,11 @@ def create( cls, d: dict, default_schema_map: Optional[Dict[str, str]] = None ) -> "TableauUpstreamReference": # Values directly from `table` object from Tableau - database = t_database = d.get(c.DATABASE, {}).get(c.NAME) - database_id = d.get(c.DATABASE, {}).get(c.ID) + database_dict = ( + d.get(c.DATABASE) or {} + ) # this sometimes is None, so we need the `or {}` + database = t_database = database_dict.get(c.NAME) + database_id = database_dict.get(c.ID) schema = t_schema = d.get(c.SCHEMA) table = t_table = d.get(c.NAME) or "" t_full_name = d.get(c.FULL_NAME) diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/report.py b/metadata-ingestion/src/datahub/ingestion/source/unity/report.py index f4579376a3b3a..f16769341853a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/unity/report.py +++ b/metadata-ingestion/src/datahub/ingestion/source/unity/report.py @@ -2,7 +2,7 @@ from typing import Optional, Tuple from datahub.ingestion.api.report import EntityFilterReport, Report -from datahub.ingestion.source.sql.sql_generic_profiler import ProfilingSqlReport +from datahub.ingestion.source.sql.sql_report import SQLSourceReport from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport from datahub.utilities.lossy_collections import LossyDict, LossyList from datahub.utilities.perf_timer import PerfTimer @@ -19,7 +19,7 @@ class UnityCatalogUsagePerfReport(Report): @dataclass -class UnityCatalogReport(IngestionStageReport, ProfilingSqlReport): +class UnityCatalogReport(IngestionStageReport, SQLSourceReport): metastores: EntityFilterReport = EntityFilterReport.field(type="metastore") catalogs: EntityFilterReport = EntityFilterReport.field(type="catalog") schemas: EntityFilterReport = EntityFilterReport.field(type="schema") diff --git a/metadata-ingestion/src/datahub/sql_parsing/datajob.py b/metadata-ingestion/src/datahub/sql_parsing/datajob.py new file mode 100644 index 0000000000000..215b207c3dcf5 --- /dev/null +++ b/metadata-ingestion/src/datahub/sql_parsing/datajob.py @@ -0,0 +1,50 @@ +import logging +from typing import Iterable, List, Optional + +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.metadata.schema_classes import ( + DataJobInputOutputClass, + FineGrainedLineageClass, + UpstreamLineageClass, +) + +logger = logging.getLogger(__name__) + + +def to_datajob_input_output( + *, mcps: Iterable[MetadataChangeProposalWrapper], ignore_extra_mcps: bool = True +) -> Optional[DataJobInputOutputClass]: + inputDatasets: List[str] = [] + outputDatasets: List[str] = [] + fineGrainedLineages: List[FineGrainedLineageClass] = [] + for mcp in mcps: + # TODO: Represent simple write operations without lineage as outputDatasets. + + upstream_lineage = mcp.as_workunit().get_aspect_of_type(UpstreamLineageClass) + if upstream_lineage is not None: + if mcp.entityUrn and mcp.entityUrn not in outputDatasets: + outputDatasets.append(mcp.entityUrn) + + for upstream in upstream_lineage.upstreams: + if upstream.dataset not in inputDatasets: + inputDatasets.append(upstream.dataset) + + if upstream_lineage.fineGrainedLineages: + for fineGrainedLineage in upstream_lineage.fineGrainedLineages: + fineGrainedLineages.append(fineGrainedLineage) + + elif ignore_extra_mcps: + pass + else: + raise ValueError( + f"Expected an upstreamLineage aspect, got {mcp.aspectName} for {mcp.entityUrn}" + ) + + if not inputDatasets and not outputDatasets: + return None + + return DataJobInputOutputClass( + inputDatasets=inputDatasets, + outputDatasets=outputDatasets, + fineGrainedLineages=fineGrainedLineages, + ) diff --git a/metadata-ingestion/src/datahub/sql_parsing/query_types.py b/metadata-ingestion/src/datahub/sql_parsing/query_types.py index 2acad19418c11..802fb3e993f42 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/query_types.py +++ b/metadata-ingestion/src/datahub/sql_parsing/query_types.py @@ -14,7 +14,16 @@ def _is_temp_table(table: sqlglot.exp.Table, dialect: sqlglot.Dialect) -> bool: identifier: sqlglot.exp.Identifier = table.this return identifier.args.get("temporary") or ( - is_dialect_instance(dialect, "redshift") and identifier.name.startswith("#") + # These dialects use # as a prefix for temp tables. + is_dialect_instance( + dialect, + [ + "redshift", + "mssql", + # sybase is another one, but we don't support that dialect yet. + ], + ) + and identifier.name.startswith("#") ) diff --git a/metadata-ingestion/src/datahub/sql_parsing/split_statements.py b/metadata-ingestion/src/datahub/sql_parsing/split_statements.py new file mode 100644 index 0000000000000..42dda4e62158b --- /dev/null +++ b/metadata-ingestion/src/datahub/sql_parsing/split_statements.py @@ -0,0 +1,163 @@ +import re +from enum import Enum +from typing import Generator, List, Tuple + +CONTROL_FLOW_KEYWORDS = [ + "GO", + r"BEGIN\w+TRY", + r"BEGIN\w+CATCH", + "BEGIN", + r"END\w+TRY", + r"END\w+CATCH", + "END", +] + +# There's an exception to this rule, which is when the statement +# is preceeded by a CTE. +FORCE_NEW_STATEMENT_KEYWORDS = [ + # SELECT is used inside queries as well, so we can't include it here. + "INSERT", + "UPDATE", + "DELETE", + "MERGE", +] + + +class ParserState(Enum): + NORMAL = 1 + STRING = 2 + COMMENT = 3 + MULTILINE_COMMENT = 4 + + +def _is_keyword_at_position(sql: str, pos: int, keyword: str) -> bool: + """ + Check if a keyword exists at the given position using regex word boundaries. + """ + if pos + len(keyword) > len(sql): + return False + + # If we're not at a word boundary, we can't generate a keyword. + if pos > 0 and not ( + bool(re.match(r"\w\W", sql[pos - 1 : pos + 1])) + or bool(re.match(r"\W\w", sql[pos - 1 : pos + 1])) + ): + return False + + pattern = rf"^{re.escape(keyword)}\b" + match = re.match(pattern, sql[pos:], re.IGNORECASE) + return bool(match) + + +def _look_ahead_for_keywords( + sql: str, pos: int, keywords: List[str] +) -> Tuple[bool, str, int]: + """ + Look ahead for SQL keywords at the current position. + """ + + for keyword in keywords: + if _is_keyword_at_position(sql, pos, keyword): + return True, keyword, len(keyword) + return False, "", 0 + + +def split_statements(sql: str) -> Generator[str, None, None]: + """ + Split T-SQL code into individual statements, handling various SQL constructs. + """ + if not sql or not sql.strip(): + return + + current_statement: List[str] = [] + state = ParserState.NORMAL + i = 0 + + def yield_if_complete() -> Generator[str, None, None]: + statement = "".join(current_statement).strip() + if statement: + yield statement + current_statement.clear() + + prev_real_char = "\0" # the most recent non-whitespace, non-comment character + while i < len(sql): + c = sql[i] + next_char = sql[i + 1] if i < len(sql) - 1 else "\0" + + if state == ParserState.NORMAL: + if c == "'": + state = ParserState.STRING + current_statement.append(c) + prev_real_char = c + elif c == "-" and next_char == "-": + state = ParserState.COMMENT + current_statement.append(c) + current_statement.append(next_char) + i += 1 + elif c == "/" and next_char == "*": + state = ParserState.MULTILINE_COMMENT + current_statement.append(c) + current_statement.append(next_char) + i += 1 + else: + most_recent_real_char = prev_real_char + if not c.isspace(): + prev_real_char = c + + is_control_keyword, keyword, keyword_len = _look_ahead_for_keywords( + sql, i, keywords=CONTROL_FLOW_KEYWORDS + ) + if is_control_keyword: + # Yield current statement if any + yield from yield_if_complete() + # Yield keyword as its own statement + yield keyword + i += keyword_len + continue + + ( + is_force_new_statement_keyword, + keyword, + keyword_len, + ) = _look_ahead_for_keywords( + sql, i, keywords=FORCE_NEW_STATEMENT_KEYWORDS + ) + if ( + is_force_new_statement_keyword and most_recent_real_char != ")" + ): # usually we'd have a close paren that closes a CTE + # Force termination of current statement + yield from yield_if_complete() + + current_statement.append(keyword) + i += keyword_len + continue + + elif c == ";": + yield from yield_if_complete() + else: + current_statement.append(c) + + elif state == ParserState.STRING: + current_statement.append(c) + if c == "'" and next_char == "'": + current_statement.append(next_char) + i += 1 + elif c == "'": + state = ParserState.NORMAL + + elif state == ParserState.COMMENT: + current_statement.append(c) + if c == "\n": + state = ParserState.NORMAL + + elif state == ParserState.MULTILINE_COMMENT: + current_statement.append(c) + if c == "*" and next_char == "/": + current_statement.append(next_char) + i += 1 + state = ParserState.NORMAL + + i += 1 + + # Handle the last statement + yield from yield_if_complete() diff --git a/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py b/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py index 360ccd7bf3507..44f0d7be7aad9 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py +++ b/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py @@ -762,7 +762,6 @@ def add_observed_query( This assumes that queries come in order of increasing timestamps. """ - self.report.num_observed_queries += 1 # All queries with no session ID are assumed to be part of the same session. diff --git a/metadata-ingestion/tests/integration/sigma/golden_test_platform_instance_ingest.json b/metadata-ingestion/tests/integration/sigma/golden_test_platform_instance_ingest.json index 12bb7734f30a6..645e710309b0d 100644 --- a/metadata-ingestion/tests/integration/sigma/golden_test_platform_instance_ingest.json +++ b/metadata-ingestion/tests/integration/sigma/golden_test_platform_instance_ingest.json @@ -10,23 +10,7 @@ } }, "systemMetadata": { - "lastObserved": 1713794496006, - "runId": "sigma-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,cloud_instance.49HFLTr6xytgrPly3PFsNC,PROD)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d" - } - }, - "systemMetadata": { - "lastObserved": 1713794496008, + "lastObserved": 1732608523763, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } @@ -56,7 +40,7 @@ } }, "systemMetadata": { - "lastObserved": 1713794496007, + "lastObserved": 1732608523764, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } @@ -65,40 +49,30 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,cloud_instance.49HFLTr6xytgrPly3PFsNC,PROD)", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "container", "aspect": { "json": { - "platform": "urn:li:dataPlatform:sigma", - "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:sigma,cloud_instance)" + "container": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d" } }, "systemMetadata": { - "lastObserved": 1713794496008, + "lastObserved": 1732608523764, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,cloud_instance.49HFLTr6xytgrPly3PFsNC,PROD)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(sigma,cloud_instance.Ml9C5ezT5W)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "status", "aspect": { "json": { - "path": [ - { - "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:sigma,cloud_instance)", - "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:sigma,cloud_instance)" - }, - { - "id": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d", - "urn": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d" - } - ] + "removed": false } }, "systemMetadata": { - "lastObserved": 1713794496010, + "lastObserved": 1732608523833, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } @@ -107,16 +81,15 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,cloud_instance.49HFLTr6xytgrPly3PFsNC,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "typeNames": [ - "Sigma Dataset" - ] + "platform": "urn:li:dataPlatform:sigma", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:sigma,cloud_instance)" } }, "systemMetadata": { - "lastObserved": 1713794496009, + "lastObserved": 1732608523764, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } @@ -125,40 +98,44 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,cloud_instance.49HFLTr6xytgrPly3PFsNC,PROD)", "changeType": "UPSERT", - "aspectName": "ownership", + "aspectName": "subTypes", "aspect": { "json": { - "owners": [ - { - "owner": "urn:li:corpuser:Shubham_Jagtap", - "type": "DATAOWNER" - } - ], - "ownerTypes": {}, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } + "typeNames": [ + "Sigma Dataset" + ] } }, "systemMetadata": { - "lastObserved": 1713794496009, + "lastObserved": 1732608523765, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,cloud_instance.5LqGLu14qUnqh3cN6wRJBd,PROD)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(sigma,cloud_instance.Ml9C5ezT5W)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "browsePathsV2", "aspect": { "json": { - "removed": false + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:sigma,cloud_instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:sigma,cloud_instance)" + }, + { + "id": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d", + "urn": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d" + }, + { + "id": "Acryl Workbook" + } + ] } }, "systemMetadata": { - "lastObserved": 1713794496011, + "lastObserved": 1732608523835, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } @@ -167,14 +144,14 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,cloud_instance.5LqGLu14qUnqh3cN6wRJBd,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "status", "aspect": { "json": { - "container": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d" + "removed": false } }, "systemMetadata": { - "lastObserved": 1713794496012, + "lastObserved": 1732608523781, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } @@ -206,33 +183,7 @@ } }, "systemMetadata": { - "lastObserved": 1713794496012, - "runId": "sigma-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d", - "changeType": "UPSERT", - "aspectName": "ownership", - "aspect": { - "json": { - "owners": [ - { - "owner": "urn:li:corpuser:Shubham_Jagtap", - "type": "DATAOWNER" - } - ], - "ownerTypes": {}, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, - "systemMetadata": { - "lastObserved": 1713794496201, + "lastObserved": 1732608523781, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } @@ -241,49 +192,26 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,cloud_instance.5LqGLu14qUnqh3cN6wRJBd,PROD)", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:sigma", - "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:sigma,cloud_instance)" - } - }, - "systemMetadata": { - "lastObserved": 1713794496013, - "runId": "sigma-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d", - "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "container", "aspect": { "json": { - "typeNames": [ - "Sigma Workspace" - ] + "container": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d" } }, "systemMetadata": { - "lastObserved": 1713794496200, + "lastObserved": 1732608523782, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,cloud_instance.5LqGLu14qUnqh3cN6wRJBd,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,cloud_instance.49HFLTr6xytgrPly3PFsNC,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ - { - "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:sigma,cloud_instance)", - "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:sigma,cloud_instance)" - }, { "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:sigma,cloud_instance)", "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:sigma,cloud_instance)" @@ -291,22 +219,19 @@ { "id": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d", "urn": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d" - }, - { - "id": "New Folder" } ] } }, "systemMetadata": { - "lastObserved": 1713794496015, + "lastObserved": 1732608523765, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } }, { - "entityType": "container", - "entityUrn": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,cloud_instance.5LqGLu14qUnqh3cN6wRJBd,PROD)", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -316,7 +241,7 @@ } }, "systemMetadata": { - "lastObserved": 1713794496200, + "lastObserved": 1732608523782, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } @@ -334,34 +259,51 @@ } }, "systemMetadata": { - "lastObserved": 1713794496014, + "lastObserved": 1732608523783, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,cloud_instance.5LqGLu14qUnqh3cN6wRJBd,PROD)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(sigma,cloud_instance.Ml9C5ezT5W)", "changeType": "UPSERT", - "aspectName": "globalTags", + "aspectName": "chartInfo", "aspect": { "json": { - "tags": [ + "customProperties": { + "VizualizationType": "bar", + "type": "visualization" + }, + "externalUrl": "https://app.sigmacomputing.com/acryldata/workbook/4JRFW1HThPI1K3YTjouXI7?:nodeId=Ml9C5ezT5W&:fullScreen=true", + "title": "Count of Profile Id by Status", + "description": "", + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "inputs": [ { - "tag": "urn:li:tag:Deprecated" + "string": "urn:li:dataset:(urn:li:dataPlatform:sigma,cloud_instance.49HFLTr6xytgrPly3PFsNC,PROD)" } ] } }, "systemMetadata": { - "lastObserved": 1713794496015, + "lastObserved": 1732608523833, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,cloud_instance.5LqGLu14qUnqh3cN6wRJBd,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,cloud_instance.49HFLTr6xytgrPly3PFsNC,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -380,14 +322,14 @@ } }, "systemMetadata": { - "lastObserved": 1713794496014, + "lastObserved": 1732608523765, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } }, { - "entityType": "container", - "entityUrn": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,cloud_instance.9bbbe3b0-c0c8-4fac-b6f1-8dfebfe74f8b)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -396,57 +338,76 @@ } }, "systemMetadata": { - "lastObserved": 1713794496199, + "lastObserved": 1732608523784, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } }, { - "entityType": "container", - "entityUrn": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,cloud_instance.9bbbe3b0-c0c8-4fac-b6f1-8dfebfe74f8b)", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "dashboardInfo", "aspect": { "json": { "customProperties": { - "platform": "sigma", - "instance": "cloud_instance", - "workspaceId": "3ee61405-3be2-4000-ba72-60d36757b95b" - }, - "name": "Acryl Data", - "created": { - "time": 1710232264826 + "path": "Acryl Data", + "latestVersion": "2" }, + "externalUrl": "https://app.sigmacomputing.com/acryldata/workbook/4JRFW1HThPI1K3YTjouXI7", + "title": "Acryl Workbook", + "description": "", + "charts": [], + "datasets": [], + "dashboards": [ + { + "sourceUrn": "urn:li:dashboard:(sigma,cloud_instance.9bbbe3b0-c0c8-4fac-b6f1-8dfebfe74f8b)", + "destinationUrn": "urn:li:dashboard:(sigma,cloud_instance.OSnGLBzL1i)" + }, + { + "sourceUrn": "urn:li:dashboard:(sigma,cloud_instance.9bbbe3b0-c0c8-4fac-b6f1-8dfebfe74f8b)", + "destinationUrn": "urn:li:dashboard:(sigma,cloud_instance.DFSieiAcgo)" + } + ], "lastModified": { - "time": 1710232264826 + "created": { + "time": 1713188691477, + "actor": "urn:li:corpuser:datahub" + }, + "lastModified": { + "time": 1713189117302, + "actor": "urn:li:corpuser:datahub" + } } } }, "systemMetadata": { - "lastObserved": 1713794496199, + "lastObserved": 1732608523785, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(sigma,cloud_instance.kH0MeihtGs)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,cloud_instance.9bbbe3b0-c0c8-4fac-b6f1-8dfebfe74f8b)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "subTypes", "aspect": { "json": { - "removed": false + "typeNames": [ + "Sigma Workbook" + ] } }, "systemMetadata": { - "lastObserved": 1713794496053, + "lastObserved": 1732608523785, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } }, { - "entityType": "container", - "entityUrn": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,cloud_instance.5LqGLu14qUnqh3cN6wRJBd,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -455,227 +416,81 @@ { "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:sigma,cloud_instance)", "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:sigma,cloud_instance)" + }, + { + "id": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d", + "urn": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d" + }, + { + "id": "New Folder" } ] } }, "systemMetadata": { - "lastObserved": 1713794496202, + "lastObserved": 1732608523783, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(sigma,cloud_instance.kH0MeihtGs)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,cloud_instance.9bbbe3b0-c0c8-4fac-b6f1-8dfebfe74f8b)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "ownership", "aspect": { "json": { - "container": "urn:li:container:084a2e283eddfc576ce70989b395a7d8" + "owners": [ + { + "owner": "urn:li:corpuser:Shubham_Jagtap", + "type": "DATAOWNER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } } }, "systemMetadata": { - "lastObserved": 1713794496055, + "lastObserved": 1732608523786, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(sigma,cloud_instance.kH0MeihtGs)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,cloud_instance.9bbbe3b0-c0c8-4fac-b6f1-8dfebfe74f8b)", "changeType": "UPSERT", - "aspectName": "inputFields", + "aspectName": "globalTags", "aspect": { "json": { - "fields": [ + "tags": [ { - "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,cloud_instance.kH0MeihtGs),Pk)", - "schemaField": { - "fieldPath": "Pk", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "String", - "recursive": false, - "isPartOfKey": false - } - }, - { - "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,cloud_instance.kH0MeihtGs),Pet Fk)", - "schemaField": { - "fieldPath": "Pet Fk", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "String", - "recursive": false, - "isPartOfKey": false - } - }, - { - "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,cloud_instance.kH0MeihtGs),Human Fk)", - "schemaField": { - "fieldPath": "Human Fk", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "String", - "recursive": false, - "isPartOfKey": false - } - }, - { - "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,cloud_instance.kH0MeihtGs),Status)", - "schemaField": { - "fieldPath": "Status", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "String", - "recursive": false, - "isPartOfKey": false - } - }, - { - "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,cloud_instance.kH0MeihtGs),Created At)", - "schemaField": { - "fieldPath": "Created At", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "String", - "recursive": false, - "isPartOfKey": false - } - }, - { - "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,cloud_instance.kH0MeihtGs),Updated At)", - "schemaField": { - "fieldPath": "Updated At", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "String", - "recursive": false, - "isPartOfKey": false - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1713794496055, - "runId": "sigma-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "chart", - "entityUrn": "urn:li:chart:(sigma,cloud_instance.kH0MeihtGs)", - "changeType": "UPSERT", - "aspectName": "chartInfo", - "aspect": { - "json": { - "customProperties": { - "VizualizationType": "levelTable", - "type": "table" - }, - "externalUrl": "https://app.sigmacomputing.com/acryldata/workbook/4JRFW1HThPI1K3YTjouXI7?:nodeId=kH0MeihtGs&:fullScreen=true", - "title": "ADOPTIONS", - "description": "", - "lastModified": { - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - }, - "inputs": [ - { - "string": "urn:li:dataset:(urn:li:dataPlatform:snowflake,dev_instance.long_tail_companions.adoption.adoptions,DEV)" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1718348049212, - "runId": "sigma-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "chart", - "entityUrn": "urn:li:chart:(sigma,cloud_instance.kH0MeihtGs)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:sigma,cloud_instance)", - "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:sigma,cloud_instance)" - }, - { - "id": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d", - "urn": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d" - }, - { - "id": "urn:li:container:084a2e283eddfc576ce70989b395a7d8", - "urn": "urn:li:container:084a2e283eddfc576ce70989b395a7d8" + "tag": "urn:li:tag:Warning" } ] } }, "systemMetadata": { - "lastObserved": 1713794496058, + "lastObserved": 1732608523786, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } }, { - "entityType": "container", - "entityUrn": "urn:li:container:084a2e283eddfc576ce70989b395a7d8", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,cloud_instance.9bbbe3b0-c0c8-4fac-b6f1-8dfebfe74f8b)", "changeType": "UPSERT", - "aspectName": "ownership", + "aspectName": "container", "aspect": { "json": { - "owners": [ - { - "owner": "urn:li:corpuser:Shubham_Jagtap", - "type": "DATAOWNER" - } - ], - "ownerTypes": {}, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } + "container": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d" } }, "systemMetadata": { - "lastObserved": 1713794496019, + "lastObserved": 1732608523786, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } @@ -691,7 +506,7 @@ } }, "systemMetadata": { - "lastObserved": 1713794496021, + "lastObserved": 1732608523787, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } @@ -713,6 +528,7 @@ "urn:li:chart:(sigma,cloud_instance.Ml9C5ezT5W)" ], "datasets": [], + "dashboards": [], "lastModified": { "created": { "time": 0, @@ -726,87 +542,7 @@ } }, "systemMetadata": { - "lastObserved": 1713794496022, - "runId": "sigma-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:084a2e283eddfc576ce70989b395a7d8", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "sigma", - "instance": "cloud_instance", - "workbookId": "9bbbe3b0-c0c8-4fac-b6f1-8dfebfe74f8b", - "path": "Acryl Data", - "latestVersion": "2" - }, - "externalUrl": "https://app.sigmacomputing.com/acryldata/workbook/4JRFW1HThPI1K3YTjouXI7", - "name": "Acryl Workbook", - "created": { - "time": 1713188691477 - }, - "lastModified": { - "time": 1713189117302 - } - } - }, - "systemMetadata": { - "lastObserved": 1713794496016, - "runId": "sigma-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(sigma,cloud_instance.OSnGLBzL1i)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:084a2e283eddfc576ce70989b395a7d8" - } - }, - "systemMetadata": { - "lastObserved": 1713794496023, - "runId": "sigma-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:084a2e283eddfc576ce70989b395a7d8", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Sigma Workbook" - ] - } - }, - "systemMetadata": { - "lastObserved": 1713794496018, - "runId": "sigma-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:084a2e283eddfc576ce70989b395a7d8", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1713794496017, + "lastObserved": 1732608523788, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } @@ -823,7 +559,7 @@ } }, "systemMetadata": { - "lastObserved": 1713794496023, + "lastObserved": 1732608523788, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } @@ -845,108 +581,87 @@ "urn": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d" }, { - "id": "urn:li:container:084a2e283eddfc576ce70989b395a7d8", - "urn": "urn:li:container:084a2e283eddfc576ce70989b395a7d8" + "id": "Acryl Workbook" } ] } }, "systemMetadata": { - "lastObserved": 1713794496024, + "lastObserved": 1732608523788, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } }, { - "entityType": "container", - "entityUrn": "urn:li:container:084a2e283eddfc576ce70989b395a7d8", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,cloud_instance.9bbbe3b0-c0c8-4fac-b6f1-8dfebfe74f8b)", "changeType": "UPSERT", - "aspectName": "globalTags", + "aspectName": "browsePathsV2", "aspect": { "json": { - "tags": [ + "path": [ { - "tag": "urn:li:tag:Warning" + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:sigma,cloud_instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:sigma,cloud_instance)" + }, + { + "id": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d", + "urn": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d" + }, + { + "id": "Acryl Workbook" } ] } }, "systemMetadata": { - "lastObserved": 1713794496019, + "lastObserved": 1732608523787, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } }, { - "entityType": "container", - "entityUrn": "urn:li:container:084a2e283eddfc576ce70989b395a7d8", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:sigma", - "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:sigma,cloud_instance)" - } - }, - "systemMetadata": { - "lastObserved": 1713794496018, - "runId": "sigma-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "chart", - "entityUrn": "urn:li:chart:(sigma,cloud_instance.Ml9C5ezT5W)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1713794496114, - "runId": "sigma-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:084a2e283eddfc576ce70989b395a7d8", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,cloud_instance.5LqGLu14qUnqh3cN6wRJBd,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "globalTags", "aspect": { "json": { - "path": [ - { - "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:sigma,cloud_instance)", - "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:sigma,cloud_instance)" - }, + "tags": [ { - "id": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d", - "urn": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d" + "tag": "urn:li:tag:Deprecated" } ] } }, "systemMetadata": { - "lastObserved": 1713794496020, + "lastObserved": 1732608523783, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(sigma,cloud_instance.Ml9C5ezT5W)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,cloud_instance.5LqGLu14qUnqh3cN6wRJBd,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "ownership", "aspect": { "json": { - "container": "urn:li:container:084a2e283eddfc576ce70989b395a7d8" + "owners": [ + { + "owner": "urn:li:corpuser:Shubham_Jagtap", + "type": "DATAOWNER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } } }, "systemMetadata": { - "lastObserved": 1713794496116, + "lastObserved": 1732608523782, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } @@ -1047,208 +762,20 @@ "nativeDataType": "String", "recursive": false, "isPartOfKey": false - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1713794496117, - "runId": "sigma-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "chart", - "entityUrn": "urn:li:chart:(sigma,cloud_instance.Ml9C5ezT5W)", - "changeType": "UPSERT", - "aspectName": "chartInfo", - "aspect": { - "json": { - "customProperties": { - "VizualizationType": "bar", - "type": "visualization" - }, - "externalUrl": "https://app.sigmacomputing.com/acryldata/workbook/4JRFW1HThPI1K3YTjouXI7?:nodeId=Ml9C5ezT5W&:fullScreen=true", - "title": "Count of Profile Id by Status", - "description": "", - "lastModified": { - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - }, - "inputs": [ - { - "string": "urn:li:dataset:(urn:li:dataPlatform:sigma,cloud_instance.49HFLTr6xytgrPly3PFsNC,PROD)" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1718348049268, - "runId": "sigma-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "chart", - "entityUrn": "urn:li:chart:(sigma,cloud_instance.Ml9C5ezT5W)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:sigma,cloud_instance)", - "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:sigma,cloud_instance)" - }, - { - "id": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d", - "urn": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d" - }, - { - "id": "urn:li:container:084a2e283eddfc576ce70989b395a7d8", - "urn": "urn:li:container:084a2e283eddfc576ce70989b395a7d8" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1713794496119, - "runId": "sigma-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(sigma,cloud_instance.DFSieiAcgo)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1713794496124, - "runId": "sigma-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(sigma,cloud_instance.DFSieiAcgo)", - "changeType": "UPSERT", - "aspectName": "dashboardInfo", - "aspect": { - "json": { - "customProperties": { - "ElementsCount": "1" - }, - "title": "Page 2", - "description": "", - "charts": [ - "urn:li:chart:(sigma,cloud_instance.tQJu5N1l81)" - ], - "datasets": [], - "lastModified": { - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - } - }, - "systemMetadata": { - "lastObserved": 1713794496125, - "runId": "sigma-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:084a2e283eddfc576ce70989b395a7d8", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d" - } - }, - "systemMetadata": { - "lastObserved": 1713794496020, - "runId": "sigma-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(sigma,cloud_instance.DFSieiAcgo)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:084a2e283eddfc576ce70989b395a7d8" - } - }, - "systemMetadata": { - "lastObserved": 1713794496125, - "runId": "sigma-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "chart", - "entityUrn": "urn:li:chart:(sigma,cloud_instance.tQJu5N1l81)", - "changeType": "UPSERT", - "aspectName": "chartInfo", - "aspect": { - "json": { - "customProperties": { - "VizualizationType": "levelTable", - "type": "table" - }, - "externalUrl": "https://app.sigmacomputing.com/acryldata/workbook/4JRFW1HThPI1K3YTjouXI7?:nodeId=tQJu5N1l81&:fullScreen=true", - "title": "PETS ADOPTIONS JOIN", - "description": "", - "lastModified": { - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - }, - "inputs": [ - { - "string": "urn:li:dataset:(urn:li:dataPlatform:sigma,cloud_instance.49HFLTr6xytgrPly3PFsNC,PROD)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:snowflake,dev_instance.long_tail_companions.adoption.adoptions,DEV)" + } } ] } }, "systemMetadata": { - "lastObserved": 1718348049351, + "lastObserved": 1732608523834, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } }, { "entityType": "chart", - "entityUrn": "urn:li:chart:(sigma,cloud_instance.tQJu5N1l81)", + "entityUrn": "urn:li:chart:(sigma,cloud_instance.kH0MeihtGs)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -1257,69 +784,179 @@ } }, "systemMetadata": { - "lastObserved": 1713794496188, + "lastObserved": 1732608523803, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(sigma,cloud_instance.DFSieiAcgo)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(sigma,cloud_instance.kH0MeihtGs)", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "browsePathsV2", "aspect": { "json": { - "platform": "urn:li:dataPlatform:sigma", - "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:sigma,cloud_instance)" + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:sigma,cloud_instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:sigma,cloud_instance)" + }, + { + "id": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d", + "urn": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d" + }, + { + "id": "Acryl Workbook" + } + ] } }, "systemMetadata": { - "lastObserved": 1713794496126, + "lastObserved": 1732608523806, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(sigma,cloud_instance.DFSieiAcgo)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(sigma,cloud_instance.kH0MeihtGs)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "chartInfo", "aspect": { "json": { - "path": [ - { - "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:sigma,cloud_instance)", - "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:sigma,cloud_instance)" - }, - { - "id": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d", - "urn": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d" + "customProperties": { + "VizualizationType": "levelTable", + "type": "table" + }, + "externalUrl": "https://app.sigmacomputing.com/acryldata/workbook/4JRFW1HThPI1K3YTjouXI7?:nodeId=kH0MeihtGs&:fullScreen=true", + "title": "ADOPTIONS", + "description": "", + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "inputs": [ { - "id": "urn:li:container:084a2e283eddfc576ce70989b395a7d8", - "urn": "urn:li:container:084a2e283eddfc576ce70989b395a7d8" + "string": "urn:li:dataset:(urn:li:dataPlatform:snowflake,dev_instance.long_tail_companions.adoption.adoptions,DEV)" } ] } }, "systemMetadata": { - "lastObserved": 1713794496126, + "lastObserved": 1732608523804, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } }, { "entityType": "chart", - "entityUrn": "urn:li:chart:(sigma,cloud_instance.tQJu5N1l81)", + "entityUrn": "urn:li:chart:(sigma,cloud_instance.kH0MeihtGs)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "inputFields", "aspect": { "json": { - "container": "urn:li:container:084a2e283eddfc576ce70989b395a7d8" + "fields": [ + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,cloud_instance.kH0MeihtGs),Pk)", + "schemaField": { + "fieldPath": "Pk", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,cloud_instance.kH0MeihtGs),Pet Fk)", + "schemaField": { + "fieldPath": "Pet Fk", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,cloud_instance.kH0MeihtGs),Human Fk)", + "schemaField": { + "fieldPath": "Human Fk", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,cloud_instance.kH0MeihtGs),Status)", + "schemaField": { + "fieldPath": "Status", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,cloud_instance.kH0MeihtGs),Created At)", + "schemaField": { + "fieldPath": "Created At", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,cloud_instance.kH0MeihtGs),Updated At)", + "schemaField": { + "fieldPath": "Updated At", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + } + ] } }, "systemMetadata": { - "lastObserved": 1713794496189, + "lastObserved": 1732608523804, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } @@ -1483,40 +1120,219 @@ } }, { - "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,cloud_instance.Ml9C5ezT5W),Updated At)", - "schemaField": { - "fieldPath": "Updated At", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "String", - "recursive": false, - "isPartOfKey": false - } + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,cloud_instance.Ml9C5ezT5W),Updated At)", + "schemaField": { + "fieldPath": "Updated At", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,cloud_instance.Ml9C5ezT5W),Count of Profile Id)", + "schemaField": { + "fieldPath": "Count of Profile Id", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1732608523836, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,cloud_instance.DFSieiAcgo)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1732608523838, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,cloud_instance.DFSieiAcgo)", + "changeType": "UPSERT", + "aspectName": "dashboardInfo", + "aspect": { + "json": { + "customProperties": { + "ElementsCount": "1" + }, + "title": "Page 2", + "description": "", + "charts": [ + "urn:li:chart:(sigma,cloud_instance.tQJu5N1l81)" + ], + "datasets": [], + "dashboards": [], + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + }, + "systemMetadata": { + "lastObserved": 1732608523838, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,cloud_instance.DFSieiAcgo)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:sigma", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:sigma,cloud_instance)" + } + }, + "systemMetadata": { + "lastObserved": 1732608523839, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,cloud_instance.DFSieiAcgo)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:sigma,cloud_instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:sigma,cloud_instance)" + }, + { + "id": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d", + "urn": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d" }, { - "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,cloud_instance.Ml9C5ezT5W),Count of Profile Id)", - "schemaField": { - "fieldPath": "Count of Profile Id", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "String", - "recursive": false, - "isPartOfKey": false - } + "id": "Acryl Workbook" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1732608523839, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,cloud_instance.49HFLTr6xytgrPly3PFsNC,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,dev_instance.long_tail_companions.adoption.pets,DEV)", + "type": "COPY" } ] } }, "systemMetadata": { - "lastObserved": 1713794496120, + "lastObserved": 1732608523874, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:Deprecated", + "changeType": "UPSERT", + "aspectName": "tagKey", + "aspect": { + "json": { + "name": "Deprecated" + } + }, + "systemMetadata": { + "lastObserved": 1732608523874, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1732608523872, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:Shubham_Jagtap", + "type": "DATAOWNER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + "systemMetadata": { + "lastObserved": 1732608523873, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } @@ -1698,7 +1514,7 @@ } }, "systemMetadata": { - "lastObserved": 1713794496194, + "lastObserved": 1732608523870, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } @@ -1707,27 +1523,54 @@ "entityType": "chart", "entityUrn": "urn:li:chart:(sigma,cloud_instance.tQJu5N1l81)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "status", "aspect": { "json": { - "path": [ - { - "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:sigma,cloud_instance)", - "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:sigma,cloud_instance)" + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1732608523866, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(sigma,cloud_instance.tQJu5N1l81)", + "changeType": "UPSERT", + "aspectName": "chartInfo", + "aspect": { + "json": { + "customProperties": { + "VizualizationType": "levelTable", + "type": "table" + }, + "externalUrl": "https://app.sigmacomputing.com/acryldata/workbook/4JRFW1HThPI1K3YTjouXI7?:nodeId=tQJu5N1l81&:fullScreen=true", + "title": "PETS ADOPTIONS JOIN", + "description": "", + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "inputs": [ { - "id": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d", - "urn": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d" + "string": "urn:li:dataset:(urn:li:dataPlatform:sigma,cloud_instance.49HFLTr6xytgrPly3PFsNC,PROD)" }, { - "id": "urn:li:container:084a2e283eddfc576ce70989b395a7d8", - "urn": "urn:li:container:084a2e283eddfc576ce70989b395a7d8" + "string": "urn:li:dataset:(urn:li:dataPlatform:snowflake,dev_instance.long_tail_companions.adoption.adoptions,DEV)" } ] } }, "systemMetadata": { - "lastObserved": 1713794496194, + "lastObserved": 1732608523866, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } @@ -1909,64 +1752,134 @@ } }, "systemMetadata": { - "lastObserved": 1713794496190, + "lastObserved": 1732608523867, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,cloud_instance.49HFLTr6xytgrPly3PFsNC,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "upstreams": [ + "platform": "urn:li:dataPlatform:sigma", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:sigma,cloud_instance)" + } + }, + "systemMetadata": { + "lastObserved": 1732608523873, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,dev_instance.long_tail_companions.adoption.pets,DEV)", - "type": "COPY" + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:sigma,cloud_instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:sigma,cloud_instance)" } ] } }, "systemMetadata": { - "lastObserved": 1718348049380, + "lastObserved": 1732608523874, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } }, { - "entityType": "tag", - "entityUrn": "urn:li:tag:Warning", + "entityType": "chart", + "entityUrn": "urn:li:chart:(sigma,cloud_instance.tQJu5N1l81)", "changeType": "UPSERT", - "aspectName": "tagKey", + "aspectName": "browsePathsV2", "aspect": { "json": { - "name": "Warning" + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:sigma,cloud_instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:sigma,cloud_instance)" + }, + { + "id": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d", + "urn": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d" + }, + { + "id": "Acryl Workbook" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1732608523869, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "sigma", + "instance": "cloud_instance", + "workspaceId": "3ee61405-3be2-4000-ba72-60d36757b95b" + }, + "name": "Acryl Data", + "created": { + "time": 1710232264826 + }, + "lastModified": { + "time": 1710232264826 + } + } + }, + "systemMetadata": { + "lastObserved": 1732608523872, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:abbebb5181bf9ba2d905d2dea7d8704d", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Sigma Workspace" + ] } }, "systemMetadata": { - "lastObserved": 1713794496203, + "lastObserved": 1732608523873, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } }, { "entityType": "tag", - "entityUrn": "urn:li:tag:Deprecated", + "entityUrn": "urn:li:tag:Warning", "changeType": "UPSERT", "aspectName": "tagKey", "aspect": { "json": { - "name": "Deprecated" + "name": "Warning" } }, "systemMetadata": { - "lastObserved": 1713794496203, + "lastObserved": 1732608523875, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } diff --git a/metadata-ingestion/tests/integration/sigma/golden_test_sigma_ingest.json b/metadata-ingestion/tests/integration/sigma/golden_test_sigma_ingest.json index f800cb19f8811..bb37e7029330b 100644 --- a/metadata-ingestion/tests/integration/sigma/golden_test_sigma_ingest.json +++ b/metadata-ingestion/tests/integration/sigma/golden_test_sigma_ingest.json @@ -261,37 +261,8 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "sigma", - "workbookId": "9bbbe3b0-c0c8-4fac-b6f1-8dfebfe74f8b", - "path": "Acryl Data", - "latestVersion": "2" - }, - "externalUrl": "https://app.sigmacomputing.com/acryldata/workbook/4JRFW1HThPI1K3YTjouXI7", - "name": "Acryl Workbook", - "created": { - "time": 1713188691477 - }, - "lastModified": { - "time": 1713189117302 - } - } - }, - "systemMetadata": { - "lastObserved": 1713795619227, - "runId": "sigma-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,9bbbe3b0-c0c8-4fac-b6f1-8dfebfe74f8b)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -300,7 +271,7 @@ } }, "systemMetadata": { - "lastObserved": 1713795619228, + "lastObserved": 1732513099680, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } @@ -338,32 +309,70 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,5LqGLu14qUnqh3cN6wRJBd,PROD)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,9bbbe3b0-c0c8-4fac-b6f1-8dfebfe74f8b)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "subTypes", "aspect": { "json": { - "path": [ + "typeNames": [ + "Sigma Workbook" + ] + } + }, + "systemMetadata": { + "lastObserved": 1732513099681, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,9bbbe3b0-c0c8-4fac-b6f1-8dfebfe74f8b)", + "changeType": "UPSERT", + "aspectName": "dashboardInfo", + "aspect": { + "json": { + "customProperties": { + "path": "Acryl Data", + "latestVersion": "2" + }, + "externalUrl": "https://app.sigmacomputing.com/acryldata/workbook/4JRFW1HThPI1K3YTjouXI7", + "title": "Acryl Workbook", + "description": "", + "charts": [], + "datasets": [], + "dashboards": [ { - "id": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f", - "urn": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f" + "sourceUrn": "urn:li:dashboard:(sigma,9bbbe3b0-c0c8-4fac-b6f1-8dfebfe74f8b)", + "destinationUrn": "urn:li:dashboard:(sigma,OSnGLBzL1i)" }, { - "id": "New Folder" + "sourceUrn": "urn:li:dashboard:(sigma,9bbbe3b0-c0c8-4fac-b6f1-8dfebfe74f8b)", + "destinationUrn": "urn:li:dashboard:(sigma,DFSieiAcgo)" } - ] + ], + "lastModified": { + "created": { + "time": 1713188691477, + "actor": "urn:li:corpuser:datahub" + }, + "lastModified": { + "time": 1713189117302, + "actor": "urn:li:corpuser:datahub" + } + } } }, "systemMetadata": { - "lastObserved": 1713795619226, + "lastObserved": 1732535135915, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } }, { - "entityType": "container", - "entityUrn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,9bbbe3b0-c0c8-4fac-b6f1-8dfebfe74f8b)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -382,41 +391,51 @@ } }, "systemMetadata": { - "lastObserved": 1713795619229, + "lastObserved": 1732513099681, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } }, { - "entityType": "container", - "entityUrn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,9bbbe3b0-c0c8-4fac-b6f1-8dfebfe74f8b)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "globalTags", "aspect": { "json": { - "typeNames": [ - "Sigma Workbook" + "tags": [ + { + "tag": "urn:li:tag:Warning" + } ] } }, "systemMetadata": { - "lastObserved": 1713795619229, + "lastObserved": 1732513099682, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } }, { - "entityType": "container", - "entityUrn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,5LqGLu14qUnqh3cN6wRJBd,PROD)", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "browsePathsV2", "aspect": { "json": { - "platform": "urn:li:dataPlatform:sigma" + "path": [ + { + "id": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f", + "urn": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f" + }, + { + "id": "New Folder" + } + ] } }, "systemMetadata": { - "lastObserved": 1713795619228, + "lastObserved": 1713795619226, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } @@ -457,79 +476,62 @@ "entityType": "dashboard", "entityUrn": "urn:li:dashboard:(sigma,OSnGLBzL1i)", "changeType": "UPSERT", - "aspectName": "dashboardInfo", + "aspectName": "browsePathsV2", "aspect": { "json": { - "customProperties": { - "ElementsCount": "2" - }, - "title": "Page 1", - "description": "", - "charts": [ - "urn:li:chart:(sigma,kH0MeihtGs)", - "urn:li:chart:(sigma,Ml9C5ezT5W)" - ], - "datasets": [], - "lastModified": { - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" + "path": [ + { + "id": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f", + "urn": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f" }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" + { + "id": "Acryl Workbook" } - } + ] } }, "systemMetadata": { - "lastObserved": 1713795619233, + "lastObserved": 1732545848809, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,5LqGLu14qUnqh3cN6wRJBd,PROD)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,9bbbe3b0-c0c8-4fac-b6f1-8dfebfe74f8b)", "changeType": "UPSERT", - "aspectName": "ownership", + "aspectName": "container", "aspect": { "json": { - "owners": [ - { - "owner": "urn:li:corpuser:Shubham_Jagtap", - "type": "DATAOWNER" - } - ], - "ownerTypes": {}, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } + "container": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f" } }, "systemMetadata": { - "lastObserved": 1713795619224, + "lastObserved": 1732513099682, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } }, { - "entityType": "container", - "entityUrn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,9bbbe3b0-c0c8-4fac-b6f1-8dfebfe74f8b)", "changeType": "UPSERT", - "aspectName": "globalTags", + "aspectName": "browsePathsV2", "aspect": { "json": { - "tags": [ + "path": [ { - "tag": "urn:li:tag:Warning" + "id": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f", + "urn": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f" + }, + { + "id": "Acryl Workbook" } ] } }, "systemMetadata": { - "lastObserved": 1713795619230, + "lastObserved": 1732545848807, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } @@ -538,14 +540,34 @@ "entityType": "dashboard", "entityUrn": "urn:li:dashboard:(sigma,OSnGLBzL1i)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "dashboardInfo", "aspect": { "json": { - "container": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02" + "customProperties": { + "ElementsCount": "2" + }, + "title": "Page 1", + "description": "", + "charts": [ + "urn:li:chart:(sigma,kH0MeihtGs)", + "urn:li:chart:(sigma,Ml9C5ezT5W)" + ], + "datasets": [], + "dashboards": [], + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } } }, "systemMetadata": { - "lastObserved": 1713795619234, + "lastObserved": 1713795619233, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } @@ -554,59 +576,44 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,5LqGLu14qUnqh3cN6wRJBd,PROD)", "changeType": "UPSERT", - "aspectName": "globalTags", + "aspectName": "ownership", "aspect": { "json": { - "tags": [ + "owners": [ { - "tag": "urn:li:tag:Deprecated" + "owner": "urn:li:corpuser:Shubham_Jagtap", + "type": "DATAOWNER" } - ] + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } } }, "systemMetadata": { - "lastObserved": 1713795619226, + "lastObserved": 1713795619224, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(sigma,OSnGLBzL1i)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,5LqGLu14qUnqh3cN6wRJBd,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "globalTags", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f", - "urn": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f" - }, + "tags": [ { - "id": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", - "urn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02" + "tag": "urn:li:tag:Deprecated" } ] } }, "systemMetadata": { - "lastObserved": 1713795619234, - "runId": "sigma-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f" - } - }, - "systemMetadata": { - "lastObserved": 1713795619231, + "lastObserved": 1713795619226, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } @@ -642,51 +649,13 @@ "urn": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f" }, { - "id": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", - "urn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1713795619375, - "runId": "sigma-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f", - "urn": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f" + "id": "Acryl Workbook" } ] } }, "systemMetadata": { - "lastObserved": 1713795619231, - "runId": "sigma-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "chart", - "entityUrn": "urn:li:chart:(sigma,Ml9C5ezT5W)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02" - } - }, - "systemMetadata": { - "lastObserved": 1713795619373, + "lastObserved": 1732545848872, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } @@ -883,6 +852,7 @@ "urn:li:chart:(sigma,tQJu5N1l81)" ], "datasets": [], + "dashboards": [], "lastModified": { "created": { "time": 0, @@ -901,6 +871,30 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,DFSieiAcgo)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f", + "urn": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f" + }, + { + "id": "Acryl Workbook" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1732545848877, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "chart", "entityUrn": "urn:li:chart:(sigma,kH0MeihtGs)", @@ -914,14 +908,13 @@ "urn": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f" }, { - "id": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", - "urn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02" + "id": "Acryl Workbook" } ] } }, "systemMetadata": { - "lastObserved": 1713795619270, + "lastObserved": 1732545848829, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } @@ -1179,54 +1172,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(sigma,DFSieiAcgo)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02" - } - }, - "systemMetadata": { - "lastObserved": 1713795619382, - "runId": "sigma-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "chart", - "entityUrn": "urn:li:chart:(sigma,kH0MeihtGs)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02" - } - }, - "systemMetadata": { - "lastObserved": 1713795619267, - "runId": "sigma-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "chart", - "entityUrn": "urn:li:chart:(sigma,tQJu5N1l81)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02" - } - }, - "systemMetadata": { - "lastObserved": 1713795619449, - "runId": "sigma-test", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "chart", "entityUrn": "urn:li:chart:(sigma,tQJu5N1l81)", @@ -1409,31 +1354,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(sigma,DFSieiAcgo)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f", - "urn": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f" - }, - { - "id": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", - "urn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1713795619383, - "runId": "sigma-test", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "chart", "entityUrn": "urn:li:chart:(sigma,kH0MeihtGs)", @@ -1554,14 +1474,13 @@ "urn": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f" }, { - "id": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", - "urn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02" + "id": "Acryl Workbook" } ] } }, "systemMetadata": { - "lastObserved": 1713795619453, + "lastObserved": 1732545848921, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } diff --git a/metadata-ingestion/tests/integration/sigma/golden_test_sigma_ingest_shared_entities_mces.json b/metadata-ingestion/tests/integration/sigma/golden_test_sigma_ingest_shared_entities_mces.json index d6b702bdfd669..1ce671f09d776 100644 --- a/metadata-ingestion/tests/integration/sigma/golden_test_sigma_ingest_shared_entities_mces.json +++ b/metadata-ingestion/tests/integration/sigma/golden_test_sigma_ingest_shared_entities_mces.json @@ -279,37 +279,24 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,9bbbe3b0-c0c8-4fac-b6f1-8dfebfe74f8b)", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "status", "aspect": { "json": { - "customProperties": { - "platform": "sigma", - "workbookId": "9bbbe3b0-c0c8-4fac-b6f1-8dfebfe74f8b", - "path": "New Acryl Data", - "latestVersion": "2" - }, - "externalUrl": "https://app.sigmacomputing.com/acryldata/workbook/4JRFW1HThPI1K3YTjouXI7", - "name": "Acryl Workbook", - "created": { - "time": 1713188691477 - }, - "lastModified": { - "time": 1713189117302 - } + "removed": false } }, "systemMetadata": { - "lastObserved": 1718004101680, + "lastObserved": 1732513100094, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } }, { - "entityType": "container", - "entityUrn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,OSnGLBzL1i)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -318,48 +305,76 @@ } }, "systemMetadata": { - "lastObserved": 1718004101680, + "lastObserved": 1718004101684, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } }, { - "entityType": "container", - "entityUrn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,9bbbe3b0-c0c8-4fac-b6f1-8dfebfe74f8b)", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "subTypes", "aspect": { "json": { - "platform": "urn:li:dataPlatform:sigma" + "typeNames": [ + "Sigma Workbook" + ] } }, "systemMetadata": { - "lastObserved": 1718004101681, + "lastObserved": 1732513100095, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } }, { - "entityType": "container", - "entityUrn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,9bbbe3b0-c0c8-4fac-b6f1-8dfebfe74f8b)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "dashboardInfo", "aspect": { "json": { - "typeNames": [ - "Sigma Workbook" - ] + "customProperties": { + "path": "New Acryl Data", + "latestVersion": "2" + }, + "externalUrl": "https://app.sigmacomputing.com/acryldata/workbook/4JRFW1HThPI1K3YTjouXI7", + "title": "Acryl Workbook", + "description": "", + "charts": [], + "datasets": [], + "dashboards": [ + { + "sourceUrn": "urn:li:dashboard:(sigma,9bbbe3b0-c0c8-4fac-b6f1-8dfebfe74f8b)", + "destinationUrn": "urn:li:dashboard:(sigma,OSnGLBzL1i)" + }, + { + "sourceUrn": "urn:li:dashboard:(sigma,9bbbe3b0-c0c8-4fac-b6f1-8dfebfe74f8b)", + "destinationUrn": "urn:li:dashboard:(sigma,DFSieiAcgo)" + } + ], + "lastModified": { + "created": { + "time": 1713188691477, + "actor": "urn:li:corpuser:datahub" + }, + "lastModified": { + "time": 1713189117302, + "actor": "urn:li:corpuser:datahub" + } + } } }, "systemMetadata": { - "lastObserved": 1718004101681, + "lastObserved": 1732535136409, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } }, { - "entityType": "container", - "entityUrn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,9bbbe3b0-c0c8-4fac-b6f1-8dfebfe74f8b)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -378,14 +393,14 @@ } }, "systemMetadata": { - "lastObserved": 1718004101682, + "lastObserved": 1732513100096, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } }, { - "entityType": "container", - "entityUrn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,9bbbe3b0-c0c8-4fac-b6f1-8dfebfe74f8b)", "changeType": "UPSERT", "aspectName": "globalTags", "aspect": { @@ -398,60 +413,7 @@ } }, "systemMetadata": { - "lastObserved": 1718004101683, - "runId": "sigma-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:b83da80a4d444484521d9f7aca958742" - } - }, - "systemMetadata": { - "lastObserved": 1718004101683, - "runId": "sigma-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b83da80a4d444484521d9f7aca958742", - "urn": "urn:li:container:b83da80a4d444484521d9f7aca958742" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1718004101684, - "runId": "sigma-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(sigma,OSnGLBzL1i)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1718004101684, + "lastObserved": 1732513100096, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } @@ -473,6 +435,7 @@ "urn:li:chart:(sigma,Ml9C5ezT5W)" ], "datasets": [], + "dashboards": [], "lastModified": { "created": { "time": 0, @@ -491,47 +454,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(sigma,OSnGLBzL1i)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02" - } - }, - "systemMetadata": { - "lastObserved": 1718004101686, - "runId": "sigma-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(sigma,OSnGLBzL1i)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b83da80a4d444484521d9f7aca958742", - "urn": "urn:li:container:b83da80a4d444484521d9f7aca958742" - }, - { - "id": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", - "urn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1718004101686, - "runId": "sigma-test", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "chart", "entityUrn": "urn:li:chart:(sigma,kH0MeihtGs)", @@ -582,17 +504,65 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(sigma,kH0MeihtGs)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,OSnGLBzL1i)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:b83da80a4d444484521d9f7aca958742", + "urn": "urn:li:container:b83da80a4d444484521d9f7aca958742" + }, + { + "id": "Acryl Workbook" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1732545849249, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,9bbbe3b0-c0c8-4fac-b6f1-8dfebfe74f8b)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02" + "container": "urn:li:container:b83da80a4d444484521d9f7aca958742" } }, "systemMetadata": { - "lastObserved": 1718004101689, + "lastObserved": 1732513100096, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,9bbbe3b0-c0c8-4fac-b6f1-8dfebfe74f8b)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:b83da80a4d444484521d9f7aca958742", + "urn": "urn:li:container:b83da80a4d444484521d9f7aca958742" + }, + { + "id": "Acryl Workbook" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1732545849248, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } @@ -717,14 +687,13 @@ "urn": "urn:li:container:b83da80a4d444484521d9f7aca958742" }, { - "id": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", - "urn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02" + "id": "Acryl Workbook" } ] } }, "systemMetadata": { - "lastObserved": 1718004101692, + "lastObserved": 1732545849252, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } @@ -778,22 +747,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "chart", - "entityUrn": "urn:li:chart:(sigma,Ml9C5ezT5W)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02" - } - }, - "systemMetadata": { - "lastObserved": 1718004101695, - "runId": "sigma-test", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "chart", "entityUrn": "urn:li:chart:(sigma,Ml9C5ezT5W)", @@ -914,14 +867,13 @@ "urn": "urn:li:container:b83da80a4d444484521d9f7aca958742" }, { - "id": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", - "urn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02" + "id": "Acryl Workbook" } ] } }, "systemMetadata": { - "lastObserved": 1718004101697, + "lastObserved": 1732545849255, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } @@ -1155,6 +1107,7 @@ "urn:li:chart:(sigma,tQJu5N1l81)" ], "datasets": [], + "dashboards": [], "lastModified": { "created": { "time": 0, @@ -1174,17 +1127,17 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(sigma,DFSieiAcgo)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(sigma,tQJu5N1l81)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "status", "aspect": { "json": { - "container": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02" + "removed": false } }, "systemMetadata": { - "lastObserved": 1718004101704, + "lastObserved": 1718004101706, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } @@ -1202,30 +1155,13 @@ "urn": "urn:li:container:b83da80a4d444484521d9f7aca958742" }, { - "id": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", - "urn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02" + "id": "Acryl Workbook" } ] } }, "systemMetadata": { - "lastObserved": 1718004101704, - "runId": "sigma-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "chart", - "entityUrn": "urn:li:chart:(sigma,tQJu5N1l81)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1718004101706, + "lastObserved": 1732545849260, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } @@ -1263,22 +1199,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "chart", - "entityUrn": "urn:li:chart:(sigma,tQJu5N1l81)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02" - } - }, - "systemMetadata": { - "lastObserved": 1718004101708, - "runId": "sigma-test", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "chart", "entityUrn": "urn:li:chart:(sigma,tQJu5N1l81)", @@ -1474,14 +1394,13 @@ "urn": "urn:li:container:b83da80a4d444484521d9f7aca958742" }, { - "id": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", - "urn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02" + "id": "Acryl Workbook" } ] } }, "systemMetadata": { - "lastObserved": 1718004101712, + "lastObserved": 1732545849264, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } diff --git a/metadata-ingestion/tests/integration/sql_server/docker-compose.yml b/metadata-ingestion/tests/integration/sql_server/docker-compose.yml index 1046321e4f720..aed70503903c0 100644 --- a/metadata-ingestion/tests/integration/sql_server/docker-compose.yml +++ b/metadata-ingestion/tests/integration/sql_server/docker-compose.yml @@ -1,7 +1,7 @@ version: "3" services: testsqlserver: - image: "mcr.microsoft.com/mssql/server:latest" + image: "mcr.microsoft.com/mssql/server:2022-latest" platform: linux/amd64 container_name: "testsqlserver" environment: diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json index 4302c41140dc6..54821347fd28b 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json @@ -113,11 +113,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "01afcab8-187c-459f-828e-727196a1832d", + "job_id": "4130c37d-146c-43da-a671-dd9a413a44b3", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-11-21 21:01:26.550000", - "date_modified": "2024-11-21 21:01:26.690000", + "date_created": "2024-11-22 12:58:03.260000", + "date_modified": "2024-11-22 12:58:03.440000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -1496,6 +1496,138 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": {}, + "name": "age_dist", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "DemoData.Foo.age_dist", + "platform": "urn:li:dataPlatform:mssql", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "Age", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "Count", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + }, + { + "id": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9", + "urn": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.Items,PROD)", @@ -2150,8 +2282,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-11-21 21:01:26.483000", - "date_modified": "2024-11-21 21:01:26.483000" + "date_created": "2024-11-22 12:58:03.137000", + "date_modified": "2024-11-22 12:58:03.137000" }, "externalUrl": "", "name": "DemoData.Foo.Proc.With.SpecialChar", @@ -2168,14 +2300,24 @@ }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", "changeType": "UPSERT", - "aspectName": "dataJobInputOutput", + "aspectName": "dataJobInfo", "aspect": { "json": { - "inputDatasets": [], - "outputDatasets": [], - "inputDatajobs": [] + "customProperties": { + "procedure_depends_on": "{'DemoData.Foo.age_dist': 'USER_TABLE', 'DemoData.Foo.Items': 'USER_TABLE', 'DemoData.Foo.Persons': 'USER_TABLE', 'DemoData.Foo.SalesReason': 'USER_TABLE'}", + "depending_on_procedure": "{}", + "code": "CREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS NULL\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n -- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n\n SELECT ID, Age INTO #TEMPTABLE FROM NewData.FooNew.PersonsNew\n \n UPDATE DemoData.Foo.Persons\n SET Age = t.Age\n FROM DemoData.Foo.Persons p\n JOIN #TEMPTABLE t ON p.ID = t.ID\n\n END\n", + "input parameters": "[]", + "date_created": "2024-11-22 12:58:03.140000", + "date_modified": "2024-11-22 12:58:03.140000" + }, + "externalUrl": "", + "name": "DemoData.Foo.NewProc", + "type": { + "string": "MSSQL_STORED_PROCEDURE" + } } }, "systemMetadata": { @@ -4256,6 +4398,159 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,NewData.FooNew.View1,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f721da08adde46586c0f113287cb60d1" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,NewData.FooNew.View1,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "view_definition": "CREATE VIEW FooNew.View1 AS\nSELECT LastName, FirstName\nFROM FooNew.PersonsNew\nWHERE Age > 18\n", + "is_view": "True" + }, + "name": "View1", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "NewData.FooNew.View1", + "platform": "urn:li:dataPlatform:mssql", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "LastName", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255) COLLATE SQL_Latin1_General_CP1_CI_AS", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "FirstName", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255) COLLATE SQL_Latin1_General_CP1_CI_AS", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,NewData.FooNew.View1,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,NewData.FooNew.View1,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "CREATE VIEW FooNew.View1 AS\nSELECT LastName, FirstName\nFROM FooNew.PersonsNew\nWHERE Age > 18\n", + "viewLanguage": "SQL" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,NewData.FooNew.View1,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + }, + { + "id": "urn:li:container:f721da08adde46586c0f113287cb60d1", + "urn": "urn:li:container:f721da08adde46586c0f113287cb60d1" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:f3cb304e29e178d0615ed5ee6aa4ad58", @@ -4611,6 +4906,55 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD),firstname)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD),firstname)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD),lastname)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD),lastname)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataFlow", "entityUrn": "urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD)", @@ -4643,6 +4987,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", @@ -4690,5 +5050,21 @@ "runId": "mssql-test", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json index 0a50556edc638..1d702214fedf7 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json @@ -113,11 +113,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "01afcab8-187c-459f-828e-727196a1832d", + "job_id": "2a055367-5e6a-4162-b3a9-dd60f52c79a8", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-11-21 21:01:26.550000", - "date_modified": "2024-11-21 21:01:26.690000", + "date_created": "2024-11-26 07:22:19.640000", + "date_modified": "2024-11-26 07:22:19.773000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -1496,6 +1496,138 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": {}, + "name": "age_dist", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "DemoData.Foo.age_dist", + "platform": "urn:li:dataPlatform:mssql", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "Age", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "Count", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + }, + { + "id": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9", + "urn": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.Items,PROD)", @@ -2150,8 +2282,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-11-21 21:01:26.483000", - "date_modified": "2024-11-21 21:01:26.483000" + "date_created": "2024-11-26 07:22:19.510000", + "date_modified": "2024-11-26 07:22:19.510000" }, "externalUrl": "", "name": "DemoData.Foo.Proc.With.SpecialChar", @@ -2166,24 +2298,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", - "changeType": "UPSERT", - "aspectName": "dataJobInputOutput", - "aspect": { - "json": { - "inputDatasets": [], - "outputDatasets": [], - "inputDatajobs": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "container", "entityUrn": "urn:li:container:250ce23f940485303fa5e5d4f5194975", diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json index 0a50556edc638..3836e587ef8cf 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json @@ -113,11 +113,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "01afcab8-187c-459f-828e-727196a1832d", + "job_id": "4130c37d-146c-43da-a671-dd9a413a44b3", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-11-21 21:01:26.550000", - "date_modified": "2024-11-21 21:01:26.690000", + "date_created": "2024-11-22 12:58:03.260000", + "date_modified": "2024-11-22 12:58:03.440000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -1496,6 +1496,138 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": {}, + "name": "age_dist", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "DemoData.Foo.age_dist", + "platform": "urn:li:dataPlatform:mssql", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "Age", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "Count", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + }, + { + "id": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9", + "urn": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.Items,PROD)", @@ -2150,8 +2282,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-11-21 21:01:26.483000", - "date_modified": "2024-11-21 21:01:26.483000" + "date_created": "2024-11-22 12:58:03.137000", + "date_modified": "2024-11-22 12:58:03.137000" }, "externalUrl": "", "name": "DemoData.Foo.Proc.With.SpecialChar", @@ -2168,14 +2300,24 @@ }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", "changeType": "UPSERT", - "aspectName": "dataJobInputOutput", + "aspectName": "dataJobInfo", "aspect": { "json": { - "inputDatasets": [], - "outputDatasets": [], - "inputDatajobs": [] + "customProperties": { + "procedure_depends_on": "{'DemoData.Foo.age_dist': 'USER_TABLE', 'DemoData.Foo.Items': 'USER_TABLE', 'DemoData.Foo.Persons': 'USER_TABLE', 'DemoData.Foo.SalesReason': 'USER_TABLE'}", + "depending_on_procedure": "{}", + "code": "CREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS NULL\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n -- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n\n SELECT ID, Age INTO #TEMPTABLE FROM NewData.FooNew.PersonsNew\n \n UPDATE DemoData.Foo.Persons\n SET Age = t.Age\n FROM DemoData.Foo.Persons p\n JOIN #TEMPTABLE t ON p.ID = t.ID\n\n END\n", + "input parameters": "[]", + "date_created": "2024-11-22 12:58:03.140000", + "date_modified": "2024-11-22 12:58:03.140000" + }, + "externalUrl": "", + "name": "DemoData.Foo.NewProc", + "type": { + "string": "MSSQL_STORED_PROCEDURE" + } } }, "systemMetadata": { @@ -2571,6 +2713,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json index 0279a94084ce5..ebcadcc11dcbf 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json @@ -113,11 +113,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "01afcab8-187c-459f-828e-727196a1832d", + "job_id": "4130c37d-146c-43da-a671-dd9a413a44b3", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-11-21 21:01:26.550000", - "date_modified": "2024-11-21 21:01:26.690000", + "date_created": "2024-11-22 12:58:03.260000", + "date_modified": "2024-11-22 12:58:03.440000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -1496,6 +1496,138 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": {}, + "name": "age_dist", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "demodata.foo.age_dist", + "platform": "urn:li:dataPlatform:mssql", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "Age", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "Count", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + }, + { + "id": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9", + "urn": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)", @@ -2150,8 +2282,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-11-21 21:01:26.483000", - "date_modified": "2024-11-21 21:01:26.483000" + "date_created": "2024-11-22 12:58:03.137000", + "date_modified": "2024-11-22 12:58:03.137000" }, "externalUrl": "", "name": "DemoData.Foo.Proc.With.SpecialChar", @@ -2168,14 +2300,24 @@ }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", "changeType": "UPSERT", - "aspectName": "dataJobInputOutput", + "aspectName": "dataJobInfo", "aspect": { "json": { - "inputDatasets": [], - "outputDatasets": [], - "inputDatajobs": [] + "customProperties": { + "procedure_depends_on": "{'DemoData.Foo.age_dist': 'USER_TABLE', 'DemoData.Foo.Items': 'USER_TABLE', 'DemoData.Foo.Persons': 'USER_TABLE', 'DemoData.Foo.SalesReason': 'USER_TABLE'}", + "depending_on_procedure": "{}", + "code": "CREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS NULL\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n -- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n\n SELECT ID, Age INTO #TEMPTABLE FROM NewData.FooNew.PersonsNew\n \n UPDATE DemoData.Foo.Persons\n SET Age = t.Age\n FROM DemoData.Foo.Persons p\n JOIN #TEMPTABLE t ON p.ID = t.ID\n\n END\n", + "input parameters": "[]", + "date_created": "2024-11-22 12:58:03.140000", + "date_modified": "2024-11-22 12:58:03.140000" + }, + "externalUrl": "", + "name": "DemoData.Foo.NewProc", + "type": { + "string": "MSSQL_STORED_PROCEDURE" + } } }, "systemMetadata": { @@ -2515,68 +2657,19 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "containerProperties", "aspect": { "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)", - "type": "VIEW" - } - ], - "fineGrainedLineages": [ - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),Age)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD),Age)" - ], - "confidenceScore": 1.0 - }, - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),FirstName)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD),FirstName)" - ], - "confidenceScore": 1.0 - }, - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),ID)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD),ID)" - ], - "confidenceScore": 1.0 - }, - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),LastName)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD),LastName)" - ], - "confidenceScore": 1.0 - } - ] + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData" + }, + "name": "NewData", + "env": "PROD" } }, "systemMetadata": { @@ -2586,8 +2679,8 @@ } }, { - "entityType": "dataFlow", - "entityUrn": "urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2602,8 +2695,2418 @@ } }, { - "entityType": "dataFlow", - "entityUrn": "urn:li:dataFlow:(mssql,Weekly Demo Data Backup,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Database" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:8b7691fec458d7383d5bc4e213831375", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:8b7691fec458d7383d5bc4e213831375", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "db_accessadmin" + }, + "name": "db_accessadmin", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:8b7691fec458d7383d5bc4e213831375", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:8b7691fec458d7383d5bc4e213831375", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:8b7691fec458d7383d5bc4e213831375", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:8b7691fec458d7383d5bc4e213831375", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:523d13eddd725607ec835a2459b05c9c", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:523d13eddd725607ec835a2459b05c9c", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "db_backupoperator" + }, + "name": "db_backupoperator", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:523d13eddd725607ec835a2459b05c9c", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:523d13eddd725607ec835a2459b05c9c", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:523d13eddd725607ec835a2459b05c9c", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:523d13eddd725607ec835a2459b05c9c", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:29bd421b2225a415df9c750e77404c66", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:29bd421b2225a415df9c750e77404c66", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "db_datareader" + }, + "name": "db_datareader", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:29bd421b2225a415df9c750e77404c66", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:29bd421b2225a415df9c750e77404c66", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:29bd421b2225a415df9c750e77404c66", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:29bd421b2225a415df9c750e77404c66", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a3c02df4bcc7280a89f539b793b04197", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a3c02df4bcc7280a89f539b793b04197", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "db_datawriter" + }, + "name": "db_datawriter", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a3c02df4bcc7280a89f539b793b04197", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a3c02df4bcc7280a89f539b793b04197", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a3c02df4bcc7280a89f539b793b04197", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a3c02df4bcc7280a89f539b793b04197", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c3b5d1cdc69a7d8faf0e1981e89b89d1", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c3b5d1cdc69a7d8faf0e1981e89b89d1", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "db_ddladmin" + }, + "name": "db_ddladmin", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c3b5d1cdc69a7d8faf0e1981e89b89d1", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c3b5d1cdc69a7d8faf0e1981e89b89d1", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c3b5d1cdc69a7d8faf0e1981e89b89d1", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c3b5d1cdc69a7d8faf0e1981e89b89d1", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:2b937d85ae7545dc769766008a332f42", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:2b937d85ae7545dc769766008a332f42", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "db_denydatareader" + }, + "name": "db_denydatareader", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:2b937d85ae7545dc769766008a332f42", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:2b937d85ae7545dc769766008a332f42", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:2b937d85ae7545dc769766008a332f42", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:2b937d85ae7545dc769766008a332f42", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a399d8bb765028abb9e55ae39846ca5e", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a399d8bb765028abb9e55ae39846ca5e", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "db_denydatawriter" + }, + "name": "db_denydatawriter", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a399d8bb765028abb9e55ae39846ca5e", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a399d8bb765028abb9e55ae39846ca5e", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a399d8bb765028abb9e55ae39846ca5e", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a399d8bb765028abb9e55ae39846ca5e", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:457efe38f0aec2af9ad681cf1b43b1cb", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:457efe38f0aec2af9ad681cf1b43b1cb", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "db_owner" + }, + "name": "db_owner", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:457efe38f0aec2af9ad681cf1b43b1cb", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:457efe38f0aec2af9ad681cf1b43b1cb", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:457efe38f0aec2af9ad681cf1b43b1cb", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:457efe38f0aec2af9ad681cf1b43b1cb", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:1d87783ffe7e82210365dff4ca8ee7d1", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:1d87783ffe7e82210365dff4ca8ee7d1", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "db_securityadmin" + }, + "name": "db_securityadmin", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:1d87783ffe7e82210365dff4ca8ee7d1", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:1d87783ffe7e82210365dff4ca8ee7d1", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:1d87783ffe7e82210365dff4ca8ee7d1", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:1d87783ffe7e82210365dff4ca8ee7d1", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:269d0067d130eda0399a534fc787054c", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:269d0067d130eda0399a534fc787054c", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "dbo" + }, + "name": "dbo", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:269d0067d130eda0399a534fc787054c", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:269d0067d130eda0399a534fc787054c", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:269d0067d130eda0399a534fc787054c", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:269d0067d130eda0399a534fc787054c", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.dbo.productsnew,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:269d0067d130eda0399a534fc787054c" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.dbo.productsnew,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": {}, + "name": "ProductsNew", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "newdata.dbo.productsnew", + "platform": "urn:li:dataPlatform:mssql", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "ID", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "ProductName", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "NVARCHAR(max) COLLATE SQL_Latin1_General_CP1_CI_AS", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "Price", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "MONEY", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.dbo.productsnew,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.dbo.productsnew,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + }, + { + "id": "urn:li:container:269d0067d130eda0399a534fc787054c", + "urn": "urn:li:container:269d0067d130eda0399a534fc787054c" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f721da08adde46586c0f113287cb60d1", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f721da08adde46586c0f113287cb60d1", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "FooNew" + }, + "name": "FooNew", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f721da08adde46586c0f113287cb60d1", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f721da08adde46586c0f113287cb60d1", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f721da08adde46586c0f113287cb60d1", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f721da08adde46586c0f113287cb60d1", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.itemsnew,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f721da08adde46586c0f113287cb60d1" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.itemsnew,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": {}, + "name": "ItemsNew", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "newdata.foonew.itemsnew", + "platform": "urn:li:dataPlatform:mssql", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "ID", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "ItemName", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "NVARCHAR(max) COLLATE SQL_Latin1_General_CP1_CI_AS", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "Price", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "SMALLMONEY", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.itemsnew,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.itemsnew,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + }, + { + "id": "urn:li:container:f721da08adde46586c0f113287cb60d1", + "urn": "urn:li:container:f721da08adde46586c0f113287cb60d1" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f721da08adde46586c0f113287cb60d1" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": {}, + "name": "PersonsNew", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "newdata.foonew.personsnew", + "platform": "urn:li:dataPlatform:mssql", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "ID", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": true + }, + { + "fieldPath": "LastName", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255) COLLATE SQL_Latin1_General_CP1_CI_AS", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "FirstName", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255) COLLATE SQL_Latin1_General_CP1_CI_AS", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "Age", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + }, + { + "id": "urn:li:container:f721da08adde46586c0f113287cb60d1", + "urn": "urn:li:container:f721da08adde46586c0f113287cb60d1" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f721da08adde46586c0f113287cb60d1" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "view_definition": "CREATE VIEW FooNew.View1 AS\nSELECT LastName, FirstName\nFROM FooNew.PersonsNew\nWHERE Age > 18\n", + "is_view": "True" + }, + "name": "View1", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "newdata.foonew.view1", + "platform": "urn:li:dataPlatform:mssql", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "LastName", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255) COLLATE SQL_Latin1_General_CP1_CI_AS", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "FirstName", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255) COLLATE SQL_Latin1_General_CP1_CI_AS", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "CREATE VIEW FooNew.View1 AS\nSELECT LastName, FirstName\nFROM FooNew.PersonsNew\nWHERE Age > 18\n", + "viewLanguage": "SQL" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + }, + { + "id": "urn:li:container:f721da08adde46586c0f113287cb60d1", + "urn": "urn:li:container:f721da08adde46586c0f113287cb60d1" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f3cb304e29e178d0615ed5ee6aa4ad58", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f3cb304e29e178d0615ed5ee6aa4ad58", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "guest" + }, + "name": "guest", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f3cb304e29e178d0615ed5ee6aa4ad58", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f3cb304e29e178d0615ed5ee6aa4ad58", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f3cb304e29e178d0615ed5ee6aa4ad58", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f3cb304e29e178d0615ed5ee6aa4ad58", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:752bb2abafeb2dae8f4adc7ffd547780", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:752bb2abafeb2dae8f4adc7ffd547780", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "INFORMATION_SCHEMA" + }, + "name": "INFORMATION_SCHEMA", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:752bb2abafeb2dae8f4adc7ffd547780", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:752bb2abafeb2dae8f4adc7ffd547780", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:752bb2abafeb2dae8f4adc7ffd547780", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:752bb2abafeb2dae8f4adc7ffd547780", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46b713e3c7754c51649899f0f284ce34", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46b713e3c7754c51649899f0f284ce34", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "sys" + }, + "name": "sys", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46b713e3c7754c51649899f0f284ce34", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46b713e3c7754c51649899f0f284ce34", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46b713e3c7754c51649899f0f284ce34", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46b713e3c7754c51649899f0f284ce34", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),Age)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD),Age)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),FirstName)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD),FirstName)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),ID)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD),ID)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),LastName)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD),LastName)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD),FirstName)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD),FirstName)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD),LastName)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD),LastName)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", + "changeType": "UPSERT", + "aspectName": "dataJobInputOutput", + "aspect": { + "json": { + "inputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD)" + ], + "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)" + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),Age)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD),Age)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD),TempID)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD),tempid)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD),Name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD),name)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD),Age)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),Age)" + ], + "confidenceScore": 0.35 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(mssql,Weekly Demo Data Backup,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", "changeType": "UPSERT", "aspectName": "status", "aspect": { diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/procedures/DemoData.Foo.NewProc.json b/metadata-ingestion/tests/integration/sql_server/golden_files/procedures/DemoData.Foo.NewProc.json new file mode 100644 index 0000000000000..609e3a6f42945 --- /dev/null +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/procedures/DemoData.Foo.NewProc.json @@ -0,0 +1,57 @@ +[ +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", + "changeType": "UPSERT", + "aspectName": "dataJobInputOutput", + "aspect": { + "json": { + "inputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD)" + ], + "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)" + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),age)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD),age)" + ], + "confidenceScore": 0.2 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD),tempid)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD),tempid)" + ], + "confidenceScore": 0.2 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD),name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD),name)" + ], + "confidenceScore": 0.2 + } + ] + } + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/procedures/demodata.foo.proc2.json b/metadata-ingestion/tests/integration/sql_server/golden_files/procedures/demodata.foo.proc2.json new file mode 100644 index 0000000000000..8ebd1c065ebf9 --- /dev/null +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/procedures/demodata.foo.proc2.json @@ -0,0 +1,57 @@ +[ +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,demodata.foo.stored_procedures,PROD),proc2)", + "changeType": "UPSERT", + "aspectName": "dataJobInputOutput", + "aspect": { + "json": { + "inputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD)" + ], + "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)" + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),age)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD),age)" + ], + "confidenceScore": 0.2 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD),tempid)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD),tempid)" + ], + "confidenceScore": 0.2 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD),name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD),name)" + ], + "confidenceScore": 0.2 + } + ] + } + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/sql_server/procedures/DemoData.Foo.NewProc.sql b/metadata-ingestion/tests/integration/sql_server/procedures/DemoData.Foo.NewProc.sql new file mode 100644 index 0000000000000..52a8d1327653b --- /dev/null +++ b/metadata-ingestion/tests/integration/sql_server/procedures/DemoData.Foo.NewProc.sql @@ -0,0 +1,37 @@ +CREATE PROCEDURE [Foo].[NewProc] + AS + BEGIN + --insert into items table from salesreason table + insert into Foo.Items (ID, ItemName) + SELECT TempID, Name + FROM Foo.SalesReason; + + + IF OBJECT_ID('Foo.age_dist', 'U') IS NULL + + BEGIN + -- Create and populate if table doesn't exist + SELECT Age, COUNT(*) as Count + INTO Foo.age_dist + FROM Foo.Persons + GROUP BY Age + END + ELSE + BEGIN + -- Update existing table + TRUNCATE TABLE Foo.age_dist; + + INSERT INTO Foo.age_dist (Age, Count) + SELECT Age, COUNT(*) as Count + FROM Foo.Persons + GROUP BY Age + END + + SELECT * INTO #TempTable FROM NewData.FooNew.PersonsNew + + UPDATE DemoData.Foo.Persons + SET Age = t.Age + FROM DemoData.Foo.Persons p + JOIN #TempTable t ON p.ID = t.ID + + END \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/sql_server/procedures/demodata.foo.proc2.sql b/metadata-ingestion/tests/integration/sql_server/procedures/demodata.foo.proc2.sql new file mode 100644 index 0000000000000..69194a8d2c546 --- /dev/null +++ b/metadata-ingestion/tests/integration/sql_server/procedures/demodata.foo.proc2.sql @@ -0,0 +1,36 @@ +CREATE PROCEDURE [foo].[proc2] + AS + BEGIN + --insert into items table from salesreason table + insert into foo.items (id, itemame) + SELECT tempid, name + FROM foo.salesreason; + + + IF OBJECT_ID('foo.age_dist', 'U') IS NULL + + BEGIN + -- Create and populate if table doesn't exist + SELECT age, COUNT(*) as count + INTO foo.age_dist + FROM foo.persons + GROUP BY age + END + ELSE + BEGIN + -- Update existing table + TRUNCATE TABLE foo.age_dist; + + INSERT INTO foo.age_dist (age, count) + SELECT age, COUNT(*) as count + FROM foo.persons + GROUP BY age + END + + SELECT * INTO #temptable FROM newdata.foonew.personsnew + + UPDATE demodata.foo.persons + SET age = t.age + FROM demodata.foo.persons p + JOIN #temptable t ON p.ID = t.ID + END \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/sql_server/setup/setup.sql b/metadata-ingestion/tests/integration/sql_server/setup/setup.sql index f495db3b91cfa..0c3c7ee2fd29e 100644 --- a/metadata-ingestion/tests/integration/sql_server/setup/setup.sql +++ b/metadata-ingestion/tests/integration/sql_server/setup/setup.sql @@ -1,3 +1,4 @@ +DROP DATABASE IF EXISTS NewData; CREATE DATABASE NewData; GO USE NewData; @@ -14,7 +15,14 @@ CREATE TABLE FooNew.PersonsNew ( FirstName varchar(255), Age int ); +GO +CREATE VIEW FooNew.View1 AS +SELECT LastName, FirstName +FROM FooNew.PersonsNew +WHERE Age > 18 +GO +DROP DATABASE IF EXISTS DemoData; CREATE DATABASE DemoData; GO USE DemoData; @@ -47,11 +55,54 @@ CREATE TABLE Foo.SalesReason ) ; GO +DROP PROCEDURE IF EXISTS [Foo].[Proc.With.SpecialChar]; +GO CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT AS SELECT @ID AS ThatDB; GO +DROP PROCEDURE IF EXISTS [Foo].[NewProc]; +GO +CREATE PROCEDURE [Foo].[NewProc] + AS + BEGIN + --insert into items table from salesreason table + insert into Foo.Items (ID, ItemName) + SELECT TempID, Name + FROM Foo.SalesReason; + + + IF OBJECT_ID('Foo.age_dist', 'U') IS NULL + BEGIN + -- Create and populate if table doesn't exist + SELECT Age, COUNT(*) as Count + INTO Foo.age_dist + FROM Foo.Persons + GROUP BY Age + END + ELSE + BEGIN + -- Update existing table + TRUNCATE TABLE Foo.age_dist; + + INSERT INTO Foo.age_dist (Age, Count) + SELECT Age, COUNT(*) as Count + FROM Foo.Persons + GROUP BY Age + END + + SELECT ID, Age INTO #TEMPTABLE FROM NewData.FooNew.PersonsNew + + UPDATE DemoData.Foo.Persons + SET Age = t.Age + FROM DemoData.Foo.Persons p + JOIN #TEMPTABLE t ON p.ID = t.ID + + END +GO + +EXEC Foo.NewProc GO EXEC sys.sp_addextendedproperty @name = N'MS_Description', @@ -93,4 +144,4 @@ EXEC sp_attach_schedule GO EXEC dbo.sp_add_jobserver @job_name = N'Weekly Demo Data Backup' -GO +GO \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/sql_server/source_files/mssql_no_db_with_filter.yml b/metadata-ingestion/tests/integration/sql_server/source_files/mssql_no_db_with_filter.yml index 3749499074adf..703f60b277b87 100644 --- a/metadata-ingestion/tests/integration/sql_server/source_files/mssql_no_db_with_filter.yml +++ b/metadata-ingestion/tests/integration/sql_server/source_files/mssql_no_db_with_filter.yml @@ -9,6 +9,9 @@ source: database_pattern: deny: - NewData + procedure_pattern: + deny: + - DemoData.Foo.NewProc sink: type: file diff --git a/metadata-ingestion/tests/integration/sql_server/source_files/mssql_with_lower_case_urn.yml b/metadata-ingestion/tests/integration/sql_server/source_files/mssql_with_lower_case_urn.yml index ff1179034833f..94128810f026b 100644 --- a/metadata-ingestion/tests/integration/sql_server/source_files/mssql_with_lower_case_urn.yml +++ b/metadata-ingestion/tests/integration/sql_server/source_files/mssql_with_lower_case_urn.yml @@ -5,7 +5,6 @@ source: config: username: sa password: test!Password - database: DemoData host_port: localhost:21433 convert_urns_to_lowercase: true # use_odbc: True diff --git a/metadata-ingestion/tests/integration/sql_server/test_sql_server.py b/metadata-ingestion/tests/integration/sql_server/test_sql_server.py index 1f418ffbd32ea..b969f77b4c3c1 100644 --- a/metadata-ingestion/tests/integration/sql_server/test_sql_server.py +++ b/metadata-ingestion/tests/integration/sql_server/test_sql_server.py @@ -1,9 +1,16 @@ import os +import pathlib import subprocess import time +from pathlib import Path import pytest +from datahub.ingestion.source.sql.mssql.job_models import StoredProcedure +from datahub.ingestion.source.sql.mssql.stored_procedure_lineage import ( + generate_procedure_lineage, +) +from datahub.sql_parsing.schema_resolver import SchemaResolver from tests.test_helpers import mce_helpers from tests.test_helpers.click_helpers import run_datahub_cmd from tests.test_helpers.docker_helpers import cleanup_image, wait_for_port @@ -57,3 +64,50 @@ def test_mssql_ingest(mssql_runner, pytestconfig, tmp_path, mock_time, config_fi r"root\[\d+\]\['aspect'\]\['json'\]\['customProperties'\]\['date_modified'\]", ], ) + + +PROCEDURE_SQLS_DIR = pathlib.Path(__file__).parent / "procedures" +PROCEDURES_GOLDEN_DIR = pathlib.Path(__file__).parent / "golden_files/procedures/" +procedure_sqls = [sql_file.name for sql_file in PROCEDURE_SQLS_DIR.iterdir()] + + +@pytest.mark.parametrize("procedure_sql_file", procedure_sqls) +@pytest.mark.integration +def test_stored_procedure_lineage( + pytestconfig: pytest.Config, procedure_sql_file: str +) -> None: + sql_file_path = PROCEDURE_SQLS_DIR / procedure_sql_file + procedure_code = sql_file_path.read_text() + + # Procedure file is named as .. + splits = procedure_sql_file.split(".") + db = splits[0] + schema = splits[1] + name = splits[2] + + procedure = StoredProcedure( + db=db, + schema=schema, + name=name, + flow=None, # type: ignore # flow is not used in this test + code=procedure_code, + ) + data_job_urn = f"urn:li:dataJob:(urn:li:dataFlow:(mssql,{db}.{schema}.stored_procedures,PROD),{name})" + + schema_resolver = SchemaResolver(platform="mssql") + + mcps = list( + generate_procedure_lineage( + schema_resolver=schema_resolver, + procedure=procedure, + procedure_job_urn=data_job_urn, + is_temp_table=lambda name: "temp" in name.lower(), + ) + ) + mce_helpers.check_goldens_stream( + pytestconfig, + outputs=mcps, + golden_path=( + PROCEDURES_GOLDEN_DIR / Path(procedure_sql_file).with_suffix(".json") + ), + ) diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_split_statements.py b/metadata-ingestion/tests/unit/sql_parsing/test_split_statements.py new file mode 100644 index 0000000000000..06e0e84ede554 --- /dev/null +++ b/metadata-ingestion/tests/unit/sql_parsing/test_split_statements.py @@ -0,0 +1,51 @@ +from datahub.sql_parsing.split_statements import split_statements + + +def test_split_statements_complex() -> None: + test_sql = """ + CREATE TABLE Users (Id INT); + -- Comment here + INSERT INTO Users VALUES (1); + BEGIN + UPDATE Users SET Id = 2; + /* Multi-line + comment */ + DELETE FROM /* inline DELETE comment */ Users; + END + GO + SELECT * FROM Users + """ + + statements = [statement.strip() for statement in split_statements(test_sql)] + assert statements == [ + "CREATE TABLE Users (Id INT)", + "-- Comment here", + "INSERT INTO Users VALUES (1)", + "BEGIN", + "UPDATE Users SET Id = 2", + "/* Multi-line\n comment */", + "DELETE FROM /* inline DELETE comment */ Users", + "END", + "GO", + "SELECT * FROM Users", + ] + + +def test_split_statements_cte() -> None: + # SQL example from https://stackoverflow.com/a/11562724 + test_sql = """\ +WITH T AS +( SELECT InvoiceNumber, + DocTotal, + SUM(Sale + VAT) OVER(PARTITION BY InvoiceNumber) AS NewDocTotal + FROM PEDI_InvoiceDetail +) +-- comment +/* multi-line +comment */ +UPDATE T +SET DocTotal = NewDocTotal""" + statements = [statement.strip() for statement in split_statements(test_sql)] + assert statements == [ + test_sql, + ] diff --git a/metadata-ingestion/tests/unit/test_kafka_source.py b/metadata-ingestion/tests/unit/test_kafka_source.py index dfd32085b7705..cab0a2bce7ba8 100644 --- a/metadata-ingestion/tests/unit/test_kafka_source.py +++ b/metadata-ingestion/tests/unit/test_kafka_source.py @@ -10,6 +10,7 @@ ) from freezegun import freeze_time +from datahub.configuration.common import ConfigurationError from datahub.emitter.mce_builder import ( OwnerType, make_dataplatform_instance_urn, @@ -738,3 +739,23 @@ def mock_get_latest_version(subject_name: str) -> Optional[RegisteredSchema]: assert workunits[7].metadata.aspectName == "glossaryTermKey" assert workunits[8].metadata.aspectName == "tagKey" assert workunits[9].metadata.aspectName == "tagKey" + + +def test_kafka_source_oauth_cb_configuration(): + with pytest.raises( + ConfigurationError, + match=( + "oauth_cb must be a string representing python function reference " + "in the format :." + ), + ): + KafkaSourceConfig.parse_obj( + { + "connection": { + "bootstrap": "foobar:9092", + "consumer_config": { + "oauth_cb": test_kafka_ignore_warnings_on_schema_type + }, + } + } + ) diff --git a/metadata-io/build.gradle b/metadata-io/build.gradle index 09a41d100199d..41294fab7b24a 100644 --- a/metadata-io/build.gradle +++ b/metadata-io/build.gradle @@ -140,6 +140,8 @@ test { } testLogging.showStandardStreams = true testLogging.exceptionFormat = 'full' + + environment 'STRICT_URN_VALIDATION_ENABLED', 'true' } ebean { diff --git a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java index 7f56abe64f9a7..c0d65640df237 100644 --- a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java +++ b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java @@ -47,7 +47,8 @@ public class AspectsBatchImpl implements AspectsBatch { */ @Override public Pair>, List> toUpsertBatchItems( - final Map> latestAspects) { + Map> latestAspects, + Map> nextVersions) { // Process proposals to change items Stream mutatedProposalsStream = @@ -56,6 +57,7 @@ public Pair>, List> toUpsertBatchItems( .filter(item -> item instanceof ProposedItem) .map(item -> (MCPItem) item) .collect(Collectors.toList())); + // Regular change items Stream changeMCPStream = items.stream().filter(item -> !(item instanceof ProposedItem)); @@ -83,10 +85,8 @@ public Pair>, List> toUpsertBatchItems( currentValue, retrieverContext.getAspectRetriever()); } - // Populate old aspect for write hooks - upsertItem.setPreviousSystemAspect(latest); - - return upsertItem; + return AspectsBatch.incrementBatchVersion( + upsertItem, latestAspects, nextVersions); }) .collect(Collectors.toCollection(LinkedList::new)); @@ -96,6 +96,7 @@ public Pair>, List> toUpsertBatchItems( LinkedList newItems = applyMCPSideEffects(upsertBatchItems).collect(Collectors.toCollection(LinkedList::new)); upsertBatchItems.addAll(newItems); + Map> newUrnAspectNames = getNewUrnAspectsMap(getUrnAspectsMap(), upsertBatchItems); diff --git a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/validation/ValidationApiUtils.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/validation/ValidationApiUtils.java index f7e639ecf3603..c2e1c47eca1fd 100644 --- a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/validation/ValidationApiUtils.java +++ b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/validation/ValidationApiUtils.java @@ -1,6 +1,7 @@ package com.linkedin.metadata.entity.validation; import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.schema.validation.ValidationResult; import com.linkedin.data.template.RecordTemplate; import com.linkedin.metadata.Constants; @@ -10,16 +11,26 @@ import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.registry.EntityRegistry; import java.net.URISyntaxException; +import java.net.URLDecoder; import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.Set; import java.util.function.Consumer; +import java.util.stream.Collectors; +import java.util.stream.Stream; import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; @Slf4j public class ValidationApiUtils { + public static final String STRICT_URN_VALIDATION_ENABLED = "STRICT_URN_VALIDATION_ENABLED"; public static final int URN_NUM_BYTES_LIMIT = 512; + // Related to BrowsePathv2 public static final String URN_DELIMITER_SEPARATOR = "␟"; + // https://datahubproject.io/docs/what/urn/#restrictions + public static final Set ILLEGAL_URN_COMPONENT_CHARACTERS = Set.of(":", "(", ")", ","); /** * Validates a {@link RecordTemplate} and throws {@link ValidationException} if validation fails. @@ -38,6 +49,16 @@ public static void validateOrThrow(RecordTemplate record) { } public static void validateUrn(@Nonnull EntityRegistry entityRegistry, @Nonnull final Urn urn) { + validateUrn( + entityRegistry, + urn, + Boolean.TRUE.equals( + Boolean.parseBoolean( + System.getenv().getOrDefault(STRICT_URN_VALIDATION_ENABLED, "false")))); + } + + public static void validateUrn( + @Nonnull EntityRegistry entityRegistry, @Nonnull final Urn urn, boolean strict) { EntityRegistryUrnValidator validator = new EntityRegistryUrnValidator(entityRegistry); validator.setCurrentEntitySpec(entityRegistry.getEntitySpec(urn.getEntityType())); RecordTemplateValidator.validate( @@ -59,10 +80,32 @@ public static void validateUrn(@Nonnull EntityRegistry entityRegistry, @Nonnull + Integer.toString(URN_NUM_BYTES_LIMIT) + " bytes (when URL encoded)"); } + if (urn.toString().contains(URN_DELIMITER_SEPARATOR)) { throw new IllegalArgumentException( "Error: URN cannot contain " + URN_DELIMITER_SEPARATOR + " character"); } + + List illegalComponents = + urn.getEntityKey().getParts().stream() + .flatMap(ValidationApiUtils::processUrnPartRecursively) + .filter( + urnPart -> ILLEGAL_URN_COMPONENT_CHARACTERS.stream().anyMatch(urnPart::contains)) + .collect(Collectors.toList()); + + if (!illegalComponents.isEmpty()) { + String message = + String.format( + "Illegal `%s` characters detected in URN %s component(s): %s", + ILLEGAL_URN_COMPONENT_CHARACTERS, urn, illegalComponents); + + if (strict) { + throw new IllegalArgumentException(message); + } else { + log.error(message); + } + } + try { Urn.createFromString(urn.toString()); } catch (URISyntaxException e) { @@ -70,6 +113,18 @@ public static void validateUrn(@Nonnull EntityRegistry entityRegistry, @Nonnull } } + /** Recursively process URN parts with URL decoding */ + private static Stream processUrnPartRecursively(String urnPart) { + String decodedPart = + URLDecoder.decode(URLEncodingFixer.fixURLEncoding(urnPart), StandardCharsets.UTF_8); + if (decodedPart.startsWith("urn:li:")) { + // Recursively process nested URN after decoding + return UrnUtils.getUrn(decodedPart).getEntityKey().getParts().stream() + .flatMap(ValidationApiUtils::processUrnPartRecursively); + } + return Stream.of(decodedPart); + } + /** * Validates a {@link RecordTemplate} and logs a warning if validation fails. * @@ -123,4 +178,53 @@ public static void validateRecordTemplate( RecordTemplateValidator.validate(aspect, resultFunction, validator); } } + + /** + * Fixes malformed URL encoding by escaping unescaped % characters while preserving valid + * percent-encoded sequences. + */ + private static class URLEncodingFixer { + /** + * @param input The potentially malformed URL-encoded string + * @return A string with proper URL encoding that can be safely decoded + */ + public static String fixURLEncoding(String input) { + if (input == null) { + return null; + } + + StringBuilder result = new StringBuilder(input.length() * 2); + int i = 0; + + while (i < input.length()) { + char currentChar = input.charAt(i); + + if (currentChar == '%') { + if (i + 2 < input.length()) { + // Check if the next two characters form a valid hex pair + String hexPair = input.substring(i + 1, i + 3); + if (isValidHexPair(hexPair)) { + // This is a valid percent-encoded sequence, keep it as is + result.append(currentChar); + } else { + // Invalid sequence, escape the % character + result.append("%25"); + } + } else { + // % at the end of string, escape it + result.append("%25"); + } + } else { + result.append(currentChar); + } + i++; + } + + return result.toString(); + } + + private static boolean isValidHexPair(String pair) { + return pair.matches("[0-9A-Fa-f]{2}"); + } + } } diff --git a/metadata-io/metadata-io-api/src/test/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImplTest.java b/metadata-io/metadata-io-api/src/test/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImplTest.java index 31dd868b4cb4a..96f535f2295aa 100644 --- a/metadata-io/metadata-io-api/src/test/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImplTest.java +++ b/metadata-io/metadata-io-api/src/test/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImplTest.java @@ -41,6 +41,7 @@ import io.datahubproject.metadata.context.RetrieverContext; import java.nio.charset.StandardCharsets; import java.util.Collection; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.stream.Stream; @@ -120,7 +121,7 @@ public void toUpsertBatchItemsChangeItemTest() { AspectsBatchImpl.builder().items(testItems).retrieverContext(retrieverContext).build(); assertEquals( - testBatch.toUpsertBatchItems(Map.of()), + testBatch.toUpsertBatchItems(new HashMap<>(), new HashMap<>()), Pair.of(Map.of(), testItems), "Expected noop, pass through with no additional MCPs or changes"); } @@ -176,7 +177,7 @@ public void toUpsertBatchItemsPatchItemTest() { AspectsBatchImpl.builder().items(testItems).retrieverContext(retrieverContext).build(); assertEquals( - testBatch.toUpsertBatchItems(Map.of()), + testBatch.toUpsertBatchItems(new HashMap<>(), new HashMap<>()), Pair.of( Map.of(), List.of( @@ -195,7 +196,7 @@ public void toUpsertBatchItemsPatchItemTest() { .recordTemplate( new StructuredProperties() .setProperties(new StructuredPropertyValueAssignmentArray())) - .systemMetadata(testItems.get(0).getSystemMetadata()) + .systemMetadata(testItems.get(0).getSystemMetadata().setVersion("1")) .build(mockAspectRetriever), ChangeItemImpl.builder() .urn( @@ -212,7 +213,7 @@ public void toUpsertBatchItemsPatchItemTest() { .recordTemplate( new StructuredProperties() .setProperties(new StructuredPropertyValueAssignmentArray())) - .systemMetadata(testItems.get(1).getSystemMetadata()) + .systemMetadata(testItems.get(1).getSystemMetadata().setVersion("1")) .build(mockAspectRetriever))), "Expected patch items converted to upsert change items"); } @@ -264,7 +265,7 @@ public void toUpsertBatchItemsProposedItemTest() { AspectsBatchImpl.builder().items(testItems).retrieverContext(retrieverContext).build(); assertEquals( - testBatch.toUpsertBatchItems(Map.of()), + testBatch.toUpsertBatchItems(new HashMap<>(), new HashMap<>()), Pair.of( Map.of(), List.of( @@ -280,7 +281,7 @@ public void toUpsertBatchItemsProposedItemTest() { .getEntitySpec(DATASET_ENTITY_NAME) .getAspectSpec(STATUS_ASPECT_NAME)) .auditStamp(AuditStampUtils.createDefaultAuditStamp()) - .systemMetadata(testItems.get(0).getSystemMetadata()) + .systemMetadata(testItems.get(0).getSystemMetadata().setVersion("1")) .recordTemplate(new Status().setRemoved(false)) .build(mockAspectRetriever), ChangeItemImpl.builder() @@ -295,7 +296,7 @@ public void toUpsertBatchItemsProposedItemTest() { .getEntitySpec(DATASET_ENTITY_NAME) .getAspectSpec(STATUS_ASPECT_NAME)) .auditStamp(AuditStampUtils.createDefaultAuditStamp()) - .systemMetadata(testItems.get(1).getSystemMetadata()) + .systemMetadata(testItems.get(1).getSystemMetadata().setVersion("1")) .recordTemplate(new Status().setRemoved(false)) .build(mockAspectRetriever))), "Mutation to status aspect"); @@ -328,7 +329,7 @@ public void singleInvalidDoesntBreakBatch() { .build(); assertEquals( - testBatch.toUpsertBatchItems(Map.of()).getSecond().size(), + testBatch.toUpsertBatchItems(new HashMap<>(), new HashMap<>()).getSecond().size(), 1, "Expected 1 valid mcp to be passed through."); } diff --git a/metadata-io/metadata-io-api/src/test/java/com/linkedin/metadata/entity/validation/ValidationApiUtilsTest.java b/metadata-io/metadata-io-api/src/test/java/com/linkedin/metadata/entity/validation/ValidationApiUtilsTest.java new file mode 100644 index 0000000000000..e683e594d8766 --- /dev/null +++ b/metadata-io/metadata-io-api/src/test/java/com/linkedin/metadata/entity/validation/ValidationApiUtilsTest.java @@ -0,0 +1,112 @@ +package com.linkedin.metadata.entity.validation; + +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.metadata.models.registry.EntityRegistry; +import io.datahubproject.test.metadata.context.TestOperationContexts; +import java.net.URISyntaxException; +import org.testng.annotations.Test; + +public class ValidationApiUtilsTest { + private static final EntityRegistry entityRegistry = + TestOperationContexts.defaultEntityRegistry(); + + @Test + public void testValidateDatasetUrn() { + Urn validUrn = UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:hdfs,/path/to/data,PROD)"); + ValidationApiUtils.validateUrn(entityRegistry, validUrn, true); + // If no exception is thrown, test passes + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testSimpleUrnColon() { + Urn invalidUrn = UrnUtils.getUrn("urn:li:corpuser:foo:bar"); + ValidationApiUtils.validateUrn(entityRegistry, invalidUrn, true); + } + + @Test + public void testComplexUrnColon() throws URISyntaxException { + Urn validUrn = + Urn.createFromString( + "urn:li:dataset:(urn:li:dataPlatform:s3,urn:li:dataset:%28urn:li:dataPlatform:s3%2Ctest-datalake-concepts/prog_maintenance%2CPROD%29,PROD)"); + ValidationApiUtils.validateUrn(entityRegistry, validUrn, true); + // If no exception is thrown, test passes + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testUrnFabricType() { + Urn invalidUrn = UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:hdfs,/path/to/data,())"); + ValidationApiUtils.validateUrn(entityRegistry, invalidUrn, true); + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testUrnWithTrailingWhitespace() { + Urn invalidUrn = + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:hdfs,/path/to/data,PROD) "); + ValidationApiUtils.validateUrn(entityRegistry, invalidUrn, true); + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testUrnWithIllegalDelimiter() { + Urn invalidUrn = UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:hdfs␟path,PROD)"); + ValidationApiUtils.validateUrn(entityRegistry, invalidUrn, true); + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testComplexUrnWithParens() { + Urn invalidUrn = UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:hdfs,(illegal),PROD)"); + ValidationApiUtils.validateUrn(entityRegistry, invalidUrn, true); + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testSimpleUrnWithParens() { + Urn invalidUrn = UrnUtils.getUrn("urn:li:corpuser:(foo)123"); + ValidationApiUtils.validateUrn(entityRegistry, invalidUrn, true); + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testExcessiveLength() { + StringBuilder longPath = new StringBuilder("urn:li:dataset:(urn:li:dataPlatform:hdfs,"); + // Create a path that will exceed 512 bytes when URL encoded + for (int i = 0; i < 500; i++) { + longPath.append("very/long/path/"); + } + longPath.append(",PROD)"); + Urn invalidUrn = UrnUtils.getUrn(longPath.toString()); + + ValidationApiUtils.validateUrn(entityRegistry, invalidUrn, true); + } + + @Test + public void testValidComplexUrn() { + Urn validUrn = + UrnUtils.getUrn( + "urn:li:dataset:(urn:li:dataPlatform:bigquery,myproject.dataset.table,PROD)"); + + ValidationApiUtils.validateUrn(entityRegistry, validUrn); + // If no exception is thrown, test passes + } + + @Test(expectedExceptions = NullPointerException.class) + public void testUrnNull() { + ValidationApiUtils.validateUrn(entityRegistry, null); + } + + @Test + public void testValidPartialUrlEncode() { + Urn validUrn = UrnUtils.getUrn("urn:li:assertion:123=-%28__% weekly__%29"); + + ValidationApiUtils.validateUrn(entityRegistry, validUrn); + // If no exception is thrown, test passes + } + + @Test + public void testValidPartialUrlEncode2() { + Urn validUrn = + UrnUtils.getUrn( + "urn:li:dataset:(urn:li:dataPlatform:s3,urn:li:dataset:%28urn:li:dataPlatform:s3%2Ctest-datalake-concepts%prog_maintenance%2CPROD%29,PROD)"); + + ValidationApiUtils.validateUrn(entityRegistry, validUrn); + // If no exception is thrown, test passes + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/dataproducts/sideeffects/DataProductUnsetSideEffect.java b/metadata-io/src/main/java/com/linkedin/metadata/dataproducts/sideeffects/DataProductUnsetSideEffect.java index 544040d14f8b7..dae1a8ff51a2c 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/dataproducts/sideeffects/DataProductUnsetSideEffect.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/dataproducts/sideeffects/DataProductUnsetSideEffect.java @@ -9,6 +9,7 @@ import com.linkedin.dataproduct.DataProductAssociation; import com.linkedin.dataproduct.DataProductAssociationArray; import com.linkedin.dataproduct.DataProductProperties; +import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.aspect.RetrieverContext; import com.linkedin.metadata.aspect.batch.ChangeMCP; import com.linkedin.metadata.aspect.batch.MCLItem; @@ -27,9 +28,11 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; import lombok.Getter; @@ -64,71 +67,108 @@ private static Stream generatePatchRemove( MCLItem mclItem, @Nonnull RetrieverContext retrieverContext) { if (DATA_PRODUCT_PROPERTIES_ASPECT_NAME.equals(mclItem.getAspectName())) { - List mcpItems = new ArrayList<>(); + DataProductProperties dataProductProperties = mclItem.getAspect(DataProductProperties.class); if (dataProductProperties == null) { log.error("Unable to process data product properties for urn: {}", mclItem.getUrn()); return Stream.empty(); } - for (DataProductAssociation dataProductAssociation : + DataProductAssociationArray newDataProductAssociationArray = Optional.ofNullable(dataProductProperties.getAssets()) - .orElse(new DataProductAssociationArray())) { - RelatedEntitiesScrollResult result = - retrieverContext - .getGraphRetriever() - .scrollRelatedEntities( - null, - QueryUtils.newFilter( - "urn", dataProductAssociation.getDestinationUrn().toString()), - null, - EMPTY_FILTER, - ImmutableList.of("DataProductContains"), - QueryUtils.newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.INCOMING), - Collections.emptyList(), - null, - 10, // Should only ever be one, if ever greater than ten will decrease over time - // to become consistent - null, - null); - if (!result.getEntities().isEmpty()) { - for (RelatedEntities entity : result.getEntities()) { - if (!mclItem.getUrn().equals(UrnUtils.getUrn(entity.getSourceUrn()))) { - EntitySpec entitySpec = - retrieverContext - .getAspectRetriever() - .getEntityRegistry() - .getEntitySpec(DATA_PRODUCT_ENTITY_NAME); - GenericJsonPatch.PatchOp patchOp = new GenericJsonPatch.PatchOp(); - patchOp.setOp(PatchOperationType.REMOVE.getValue()); - patchOp.setPath(String.format("/assets/%s", entity.getDestinationUrn())); - mcpItems.add( - PatchItemImpl.builder() - .urn(UrnUtils.getUrn(entity.getSourceUrn())) - .entitySpec( - retrieverContext - .getAspectRetriever() - .getEntityRegistry() - .getEntitySpec(DATA_PRODUCT_ENTITY_NAME)) - .aspectName(DATA_PRODUCT_PROPERTIES_ASPECT_NAME) - .aspectSpec(entitySpec.getAspectSpec(DATA_PRODUCT_PROPERTIES_ASPECT_NAME)) - .patch( - GenericJsonPatch.builder() - .arrayPrimaryKeys( - Map.of( - DataProductPropertiesTemplate.ASSETS_FIELD_NAME, - List.of(DataProductPropertiesTemplate.KEY_FIELD_NAME))) - .patch(List.of(patchOp)) - .build() - .getJsonPatch()) - .auditStamp(mclItem.getAuditStamp()) - .systemMetadata(mclItem.getSystemMetadata()) - .build(retrieverContext.getAspectRetriever().getEntityRegistry())); - } + .orElse(new DataProductAssociationArray()); + + DataProductProperties previousDataProductProperties = + mclItem.getPreviousAspect(DataProductProperties.class); + + if (!ChangeType.UPSERT.equals(mclItem.getChangeType()) + || previousDataProductProperties == null) { + // CREATE/CREATE_ENTITY/RESTATE + return generateUnsetMCPs(mclItem, newDataProductAssociationArray, retrieverContext); + } else { + // UPSERT with previous + DataProductAssociationArray oldDataProductAssociationArray = + Optional.ofNullable(previousDataProductProperties.getAssets()) + .orElse(new DataProductAssociationArray()); + + DataProductAssociationArray additions = + newDataProductAssociationArray.stream() + .filter(association -> !oldDataProductAssociationArray.contains(association)) + .collect(Collectors.toCollection(DataProductAssociationArray::new)); + + return generateUnsetMCPs(mclItem, additions, retrieverContext); + } + } + return Stream.empty(); + } + + private static Stream generateUnsetMCPs( + @Nonnull MCLItem dataProductItem, + @Nonnull DataProductAssociationArray dataProductAssociations, + @Nonnull RetrieverContext retrieverContext) { + List mcpItems = new ArrayList<>(); + Map> patchOpMap = new HashMap<>(); + + for (DataProductAssociation dataProductAssociation : dataProductAssociations) { + RelatedEntitiesScrollResult result = + retrieverContext + .getGraphRetriever() + .scrollRelatedEntities( + null, + QueryUtils.newFilter( + "urn", dataProductAssociation.getDestinationUrn().toString()), + null, + EMPTY_FILTER, + ImmutableList.of("DataProductContains"), + QueryUtils.newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.INCOMING), + Collections.emptyList(), + null, + 10, // Should only ever be one, if ever greater than ten will decrease over time + // to become consistent + null, + null); + if (!result.getEntities().isEmpty()) { + for (RelatedEntities entity : result.getEntities()) { + if (!dataProductItem.getUrn().equals(UrnUtils.getUrn(entity.getSourceUrn()))) { + GenericJsonPatch.PatchOp patchOp = new GenericJsonPatch.PatchOp(); + patchOp.setOp(PatchOperationType.REMOVE.getValue()); + patchOp.setPath(String.format("/assets/%s", entity.getDestinationUrn())); + patchOpMap + .computeIfAbsent(entity.getSourceUrn(), urn -> new ArrayList<>()) + .add(patchOp); } } } - return mcpItems.stream(); } - return Stream.empty(); + for (String urn : patchOpMap.keySet()) { + EntitySpec entitySpec = + retrieverContext + .getAspectRetriever() + .getEntityRegistry() + .getEntitySpec(DATA_PRODUCT_ENTITY_NAME); + mcpItems.add( + PatchItemImpl.builder() + .urn(UrnUtils.getUrn(urn)) + .entitySpec( + retrieverContext + .getAspectRetriever() + .getEntityRegistry() + .getEntitySpec(DATA_PRODUCT_ENTITY_NAME)) + .aspectName(DATA_PRODUCT_PROPERTIES_ASPECT_NAME) + .aspectSpec(entitySpec.getAspectSpec(DATA_PRODUCT_PROPERTIES_ASPECT_NAME)) + .patch( + GenericJsonPatch.builder() + .arrayPrimaryKeys( + Map.of( + DataProductPropertiesTemplate.ASSETS_FIELD_NAME, + List.of(DataProductPropertiesTemplate.KEY_FIELD_NAME))) + .patch(patchOpMap.get(urn)) + .build() + .getJsonPatch()) + .auditStamp(dataProductItem.getAuditStamp()) + .systemMetadata(dataProductItem.getSystemMetadata()) + .build(retrieverContext.getAspectRetriever().getEntityRegistry())); + } + + return mcpItems.stream(); } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java index 9337ea3c2b6f7..a0a55cf505cf3 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java @@ -868,71 +868,64 @@ private List ingestAspectsToLocalDB( // Read before write is unfortunate, however batch it final Map> urnAspects = batchWithDefaults.getUrnAspectsMap(); // read #1 - final Map> latestAspects = + Map> databaseAspects = + aspectDao.getLatestAspects(urnAspects, true); + + final Map> batchAspects = EntityUtils.toSystemAspects( - opContext.getRetrieverContext().get(), - aspectDao.getLatestAspects(urnAspects, true)); + opContext.getRetrieverContext().get(), databaseAspects); + // read #2 (potentially) final Map> nextVersions = - EntityUtils.calculateNextVersions( - txContext, aspectDao, latestAspects, urnAspects); + EntityUtils.calculateNextVersions(txContext, aspectDao, batchAspects, urnAspects); // 1. Convert patches to full upserts // 2. Run any entity/aspect level hooks Pair>, List> updatedItems = - batchWithDefaults.toUpsertBatchItems(latestAspects); + batchWithDefaults.toUpsertBatchItems(batchAspects, nextVersions); // Fetch additional information if needed - final Map> updatedLatestAspects; - final Map> updatedNextVersions; + final List changeMCPs; + if (!updatedItems.getFirst().isEmpty()) { + // These items are new items from side effects + Map> sideEffects = updatedItems.getFirst(); + + final Map> updatedLatestAspects; + final Map> updatedNextVersions; + Map> newLatestAspects = EntityUtils.toSystemAspects( opContext.getRetrieverContext().get(), aspectDao.getLatestAspects(updatedItems.getFirst(), true)); // merge - updatedLatestAspects = AspectsBatch.merge(latestAspects, newLatestAspects); + updatedLatestAspects = AspectsBatch.merge(batchAspects, newLatestAspects); Map> newNextVersions = EntityUtils.calculateNextVersions( txContext, aspectDao, updatedLatestAspects, updatedItems.getFirst()); // merge updatedNextVersions = AspectsBatch.merge(nextVersions, newNextVersions); + + changeMCPs = + updatedItems.getSecond().stream() + .peek( + changeMCP -> { + // Add previous version to each side-effect + if (sideEffects + .getOrDefault( + changeMCP.getUrn().toString(), Collections.emptySet()) + .contains(changeMCP.getAspectName())) { + + AspectsBatch.incrementBatchVersion( + changeMCP, updatedLatestAspects, updatedNextVersions); + } + }) + .collect(Collectors.toList()); } else { - updatedLatestAspects = latestAspects; - updatedNextVersions = nextVersions; + changeMCPs = updatedItems.getSecond(); } - // Add previous version to each upsert - List changeMCPs = - updatedItems.getSecond().stream() - .peek( - changeMCP -> { - String urnStr = changeMCP.getUrn().toString(); - long nextVersion = - updatedNextVersions - .getOrDefault(urnStr, Map.of()) - .getOrDefault(changeMCP.getAspectName(), 0L); - - changeMCP.setPreviousSystemAspect( - updatedLatestAspects - .getOrDefault(urnStr, Map.of()) - .getOrDefault(changeMCP.getAspectName(), null)); - - changeMCP.setNextAspectVersion(nextVersion); - - // support inner-batch upserts - updatedLatestAspects - .computeIfAbsent(urnStr, key -> new HashMap<>()) - .put( - changeMCP.getAspectName(), - changeMCP.getSystemAspect(nextVersion)); - updatedNextVersions - .computeIfAbsent(urnStr, key -> new HashMap<>()) - .put(changeMCP.getAspectName(), nextVersion + 1); - }) - .collect(Collectors.toList()); - // No changes, return if (changeMCPs.isEmpty()) { return Collections.emptyList(); @@ -954,40 +947,50 @@ private List ingestAspectsToLocalDB( List upsertResults = changeMCPs.stream() .map( - item -> { - final EntityAspect.EntitySystemAspect latest = - (EntityAspect.EntitySystemAspect) item.getPreviousSystemAspect(); + writeItem -> { + + /* + database*Aspect - should be used for comparisons of before batch operation information + */ + final EntityAspect databaseAspect = + databaseAspects + .getOrDefault(writeItem.getUrn().toString(), Map.of()) + .get(writeItem.getAspectName()); + final EntityAspect.EntitySystemAspect databaseSystemAspect = + databaseAspect == null + ? null + : EntityAspect.EntitySystemAspect.builder() + .build( + writeItem.getEntitySpec(), + writeItem.getAspectSpec(), + databaseAspect); final UpdateAspectResult result; - if (overwrite || latest == null) { + /* + This condition is specifically for an older conditional write ingestAspectIfNotPresent() + overwrite is always true otherwise + */ + if (overwrite || databaseAspect == null) { result = - ingestAspectToLocalDB( - txContext, - item.getUrn(), - item.getAspectName(), - item.getRecordTemplate(), - item.getAuditStamp(), - item.getSystemMetadata(), - latest == null ? null : latest, - item.getNextAspectVersion()) + ingestAspectToLocalDB(txContext, writeItem, databaseSystemAspect) .toBuilder() - .request(item) + .request(writeItem) .build(); } else { - RecordTemplate oldValue = latest.getRecordTemplate(); - SystemMetadata oldMetadata = latest.getSystemMetadata(); + RecordTemplate oldValue = databaseSystemAspect.getRecordTemplate(); + SystemMetadata oldMetadata = databaseSystemAspect.getSystemMetadata(); result = UpdateAspectResult.builder() - .urn(item.getUrn()) - .request(item) + .urn(writeItem.getUrn()) + .request(writeItem) .oldValue(oldValue) .newValue(oldValue) .oldSystemMetadata(oldMetadata) .newSystemMetadata(oldMetadata) .operation(MetadataAuditOperation.UPDATE) - .auditStamp(item.getAuditStamp()) - .maxVersion(latest.getVersion()) + .auditStamp(writeItem.getAuditStamp()) + .maxVersion(databaseAspect.getVersion()) .build(); } @@ -1011,8 +1014,8 @@ private List ingestAspectsToLocalDB( // Only consider retention when there was a previous version .filter( result -> - latestAspects.containsKey(result.getUrn().toString()) - && latestAspects + batchAspects.containsKey(result.getUrn().toString()) + && batchAspects .get(result.getUrn().toString()) .containsKey(result.getRequest().getAspectName())) .filter( @@ -1102,9 +1105,11 @@ private List emitMCL( * @param auditStamp an {@link AuditStamp} containing metadata about the writer & current time * @param systemMetadata * @return the {@link RecordTemplate} representation of the written aspect object + * @deprecated See Conditional Write ChangeType CREATE */ @Nullable @Override + @Deprecated public RecordTemplate ingestAspectIfNotPresent( @Nonnull OperationContext opContext, @Nonnull Urn urn, @@ -2495,87 +2500,107 @@ private Map getEnvelopedAspects( ((EntityAspect.EntitySystemAspect) systemAspect).toEnvelopedAspects())); } + /** + * @param txContext Transaction context, keeps track of retries, exceptions etc. + * @param writeItem The aspect being written + * @param databaseAspect The aspect as it exists in the database. + * @return result object + */ @Nonnull private UpdateAspectResult ingestAspectToLocalDB( @Nullable TransactionContext txContext, - @Nonnull final Urn urn, - @Nonnull final String aspectName, - @Nonnull final RecordTemplate newValue, - @Nonnull final AuditStamp auditStamp, - @Nonnull final SystemMetadata providedSystemMetadata, - @Nullable final EntityAspect.EntitySystemAspect latest, - @Nonnull final Long nextVersion) { + @Nonnull final ChangeMCP writeItem, + @Nullable final EntityAspect.EntitySystemAspect databaseAspect) { // Set the "last run id" to be the run id provided with the new system metadata. This will be // stored in index // for all aspects that have a run id, regardless of whether they change. - providedSystemMetadata.setLastRunId( - providedSystemMetadata.getRunId(GetMode.NULL), SetMode.IGNORE_NULL); + writeItem + .getSystemMetadata() + .setLastRunId(writeItem.getSystemMetadata().getRunId(GetMode.NULL), SetMode.IGNORE_NULL); // 2. Compare the latest existing and new. - final RecordTemplate oldValue = latest == null ? null : latest.getRecordTemplate(); + final EntityAspect.EntitySystemAspect previousBatchAspect = + (EntityAspect.EntitySystemAspect) writeItem.getPreviousSystemAspect(); + final RecordTemplate previousValue = + previousBatchAspect == null ? null : previousBatchAspect.getRecordTemplate(); // 3. If there is no difference between existing and new, we just update // the lastObserved in system metadata. RunId should stay as the original runId - if (oldValue != null && DataTemplateUtil.areEqual(oldValue, newValue)) { - SystemMetadata latestSystemMetadata = latest.getSystemMetadata(); - latestSystemMetadata.setLastObserved(providedSystemMetadata.getLastObserved()); + if (previousValue != null + && DataTemplateUtil.areEqual(previousValue, writeItem.getRecordTemplate())) { + + SystemMetadata latestSystemMetadata = previousBatchAspect.getSystemMetadata(); + latestSystemMetadata.setLastObserved(writeItem.getSystemMetadata().getLastObserved()); latestSystemMetadata.setLastRunId( - providedSystemMetadata.getLastRunId(GetMode.NULL), SetMode.IGNORE_NULL); + writeItem.getSystemMetadata().getLastRunId(GetMode.NULL), SetMode.IGNORE_NULL); - latest.getEntityAspect().setSystemMetadata(RecordUtils.toJsonString(latestSystemMetadata)); + previousBatchAspect + .getEntityAspect() + .setSystemMetadata(RecordUtils.toJsonString(latestSystemMetadata)); - log.info("Ingesting aspect with name {}, urn {}", aspectName, urn); - aspectDao.saveAspect(txContext, latest.getEntityAspect(), false); + log.info( + "Ingesting aspect with name {}, urn {}", + previousBatchAspect.getAspectName(), + previousBatchAspect.getUrn()); + aspectDao.saveAspect(txContext, previousBatchAspect.getEntityAspect(), false); // metrics aspectDao.incrementWriteMetrics( - aspectName, 1, latest.getMetadataRaw().getBytes(StandardCharsets.UTF_8).length); + previousBatchAspect.getAspectName(), + 1, + previousBatchAspect.getMetadataRaw().getBytes(StandardCharsets.UTF_8).length); return UpdateAspectResult.builder() - .urn(urn) - .oldValue(oldValue) - .newValue(oldValue) - .oldSystemMetadata(latest.getSystemMetadata()) + .urn(writeItem.getUrn()) + .oldValue(previousValue) + .newValue(previousValue) + .oldSystemMetadata(previousBatchAspect.getSystemMetadata()) .newSystemMetadata(latestSystemMetadata) .operation(MetadataAuditOperation.UPDATE) - .auditStamp(auditStamp) + .auditStamp(writeItem.getAuditStamp()) .maxVersion(0) .build(); } // 4. Save the newValue as the latest version - log.debug("Ingesting aspect with name {}, urn {}", aspectName, urn); - String newValueStr = EntityApiUtils.toJsonAspect(newValue); + log.debug( + "Ingesting aspect with name {}, urn {}", writeItem.getAspectName(), writeItem.getUrn()); + String newValueStr = EntityApiUtils.toJsonAspect(writeItem.getRecordTemplate()); long versionOfOld = aspectDao.saveLatestAspect( txContext, - urn.toString(), - aspectName, - latest == null ? null : EntityApiUtils.toJsonAspect(oldValue), - latest == null ? null : latest.getCreatedBy(), - latest == null ? null : latest.getEntityAspect().getCreatedFor(), - latest == null ? null : latest.getCreatedOn(), - latest == null ? null : latest.getSystemMetadataRaw(), + writeItem.getUrn().toString(), + writeItem.getAspectName(), + previousBatchAspect == null ? null : EntityApiUtils.toJsonAspect(previousValue), + previousBatchAspect == null ? null : previousBatchAspect.getCreatedBy(), + previousBatchAspect == null + ? null + : previousBatchAspect.getEntityAspect().getCreatedFor(), + previousBatchAspect == null ? null : previousBatchAspect.getCreatedOn(), + previousBatchAspect == null ? null : previousBatchAspect.getSystemMetadataRaw(), newValueStr, - auditStamp.getActor().toString(), - auditStamp.hasImpersonator() ? auditStamp.getImpersonator().toString() : null, - new Timestamp(auditStamp.getTime()), - EntityApiUtils.toJsonAspect(providedSystemMetadata), - nextVersion); + writeItem.getAuditStamp().getActor().toString(), + writeItem.getAuditStamp().hasImpersonator() + ? writeItem.getAuditStamp().getImpersonator().toString() + : null, + new Timestamp(writeItem.getAuditStamp().getTime()), + EntityApiUtils.toJsonAspect(writeItem.getSystemMetadata()), + writeItem.getNextAspectVersion()); // metrics aspectDao.incrementWriteMetrics( - aspectName, 1, newValueStr.getBytes(StandardCharsets.UTF_8).length); + writeItem.getAspectName(), 1, newValueStr.getBytes(StandardCharsets.UTF_8).length); return UpdateAspectResult.builder() - .urn(urn) - .oldValue(oldValue) - .newValue(newValue) - .oldSystemMetadata(latest == null ? null : latest.getSystemMetadata()) - .newSystemMetadata(providedSystemMetadata) + .urn(writeItem.getUrn()) + .oldValue(previousValue) + .newValue(writeItem.getRecordTemplate()) + .oldSystemMetadata( + previousBatchAspect == null ? null : previousBatchAspect.getSystemMetadata()) + .newSystemMetadata(writeItem.getSystemMetadata()) .operation(MetadataAuditOperation.UPDATE) - .auditStamp(auditStamp) + .auditStamp(writeItem.getAuditStamp()) .maxVersion(versionOfOld) .build(); } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java index 61bba11098fae..35f133cc794f2 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java @@ -36,6 +36,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; @@ -425,7 +426,10 @@ public List getBrowsePaths( if (!sourceMap.containsKey(BROWSE_PATH)) { return Collections.emptyList(); } - return (List) sourceMap.get(BROWSE_PATH); + List browsePaths = + ((List) sourceMap.get(BROWSE_PATH)) + .stream().filter(Objects::nonNull).collect(Collectors.toList()); + return browsePaths; } public BrowseResultV2 browseV2( diff --git a/metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/validation/PropertyDefinitionValidator.java b/metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/validation/PropertyDefinitionValidator.java index ae5472af622ad..6e047c12da9a9 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/validation/PropertyDefinitionValidator.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/validation/PropertyDefinitionValidator.java @@ -89,6 +89,9 @@ public static Stream validateDefinitionUpserts( item.getAspect(StructuredPropertyDefinition.class); versionFormatCheck(item, newDefinition.getVersion()).ifPresent(exceptions::addException); + urnIdCheck(item).ifPresent(exceptions::addException); + qualifiedNameCheck(item, newDefinition.getQualifiedName()) + .ifPresent(exceptions::addException); if (item.getPreviousSystemAspect() != null) { @@ -192,4 +195,20 @@ private static Optional versionFormatCheck( } return Optional.empty(); } + + private static Optional urnIdCheck(MCPItem item) { + if (item.getUrn().getId().contains(" ")) { + return Optional.of(AspectValidationException.forItem(item, "Urn ID cannot have spaces")); + } + return Optional.empty(); + } + + private static Optional qualifiedNameCheck( + MCPItem item, @Nonnull String qualifiedName) { + if (qualifiedName.contains(" ")) { + return Optional.of( + AspectValidationException.forItem(item, "Qualified names cannot have spaces")); + } + return Optional.empty(); + } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/dataproducts/sideeffects/DataProductUnsetSideEffectTest.java b/metadata-io/src/test/java/com/linkedin/metadata/dataproducts/sideeffects/DataProductUnsetSideEffectTest.java index 1151014bf1162..976b165fea53d 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/dataproducts/sideeffects/DataProductUnsetSideEffectTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/dataproducts/sideeffects/DataProductUnsetSideEffectTest.java @@ -17,6 +17,7 @@ import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.GraphRetriever; +import com.linkedin.metadata.aspect.SystemAspect; import com.linkedin.metadata.aspect.batch.MCPItem; import com.linkedin.metadata.aspect.models.graph.RelatedEntities; import com.linkedin.metadata.aspect.models.graph.RelatedEntitiesScrollResult; @@ -34,6 +35,8 @@ import com.linkedin.metadata.utils.AuditStampUtils; import com.linkedin.test.metadata.aspect.TestEntityRegistry; import io.datahubproject.metadata.context.RetrieverContext; +import jakarta.json.JsonArray; +import jakarta.json.JsonObject; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -45,13 +48,7 @@ public class DataProductUnsetSideEffectTest { private static final EntityRegistry TEST_REGISTRY = new TestEntityRegistry(); private static final List SUPPORTED_CHANGE_TYPES = - List.of( - ChangeType.CREATE, - ChangeType.PATCH, - ChangeType.CREATE_ENTITY, - ChangeType.UPSERT, - ChangeType.DELETE, - ChangeType.RESTATE); + List.of(ChangeType.CREATE, ChangeType.CREATE_ENTITY, ChangeType.UPSERT, ChangeType.RESTATE); private static final Urn TEST_PRODUCT_URN = UrnUtils.getUrn("urn:li:dataProduct:someDataProductId"); @@ -251,6 +248,214 @@ public void testDPRemoveOld() { .build(mockAspectRetriever.getEntityRegistry()))); } + @Test + public void testBulkAssetMove() { + DataProductUnsetSideEffect test = new DataProductUnsetSideEffect(); + test.setConfig(TEST_PLUGIN_CONFIG); + + // Create 100 dataset URNs and set up their existing relationships + List datasetUrns = new ArrayList<>(); + for (int i = 0; i < 100; i++) { + Urn datasetUrn = + UrnUtils.getUrn( + String.format("urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_%d,PROD)", i)); + datasetUrns.add(datasetUrn); + + // Mock the existing relationship for each dataset with the old data product + RelatedEntities relatedEntities = + new RelatedEntities( + "DataProductContains", + TEST_PRODUCT_URN_2.toString(), // Old data product + datasetUrn.toString(), + RelationshipDirection.INCOMING, + null); + + List relatedEntitiesList = new ArrayList<>(); + relatedEntitiesList.add(relatedEntities); + RelatedEntitiesScrollResult relatedEntitiesScrollResult = + new RelatedEntitiesScrollResult(1, 10, null, relatedEntitiesList); + + when(retrieverContext + .getGraphRetriever() + .scrollRelatedEntities( + eq(null), + eq(QueryUtils.newFilter("urn", datasetUrn.toString())), + eq(null), + eq(EMPTY_FILTER), + eq(ImmutableList.of("DataProductContains")), + eq( + QueryUtils.newRelationshipFilter( + EMPTY_FILTER, RelationshipDirection.INCOMING)), + eq(Collections.emptyList()), + eq(null), + eq(10), + eq(null), + eq(null))) + .thenReturn(relatedEntitiesScrollResult); + } + + // Create data product properties with all 100 assets + DataProductProperties dataProductProperties = new DataProductProperties(); + DataProductAssociationArray dataProductAssociations = new DataProductAssociationArray(); + for (Urn datasetUrn : datasetUrns) { + DataProductAssociation association = new DataProductAssociation(); + association.setDestinationUrn(datasetUrn); + dataProductAssociations.add(association); + } + dataProductProperties.setAssets(dataProductAssociations); + + // Run test + ChangeItemImpl dataProductPropertiesChangeItem = + ChangeItemImpl.builder() + .urn(TEST_PRODUCT_URN) // New data product + .aspectName(DATA_PRODUCT_PROPERTIES_ASPECT_NAME) + .changeType(ChangeType.UPSERT) + .entitySpec(TEST_REGISTRY.getEntitySpec(DATA_PRODUCT_ENTITY_NAME)) + .aspectSpec( + TEST_REGISTRY + .getEntitySpec(DATA_PRODUCT_ENTITY_NAME) + .getAspectSpec(DATA_PRODUCT_PROPERTIES_ASPECT_NAME)) + .recordTemplate(dataProductProperties) + .auditStamp(AuditStampUtils.createDefaultAuditStamp()) + .build(mockAspectRetriever); + + List testOutput = + test.postMCPSideEffect( + List.of( + MCLItemImpl.builder() + .build( + dataProductPropertiesChangeItem, + null, + null, + retrieverContext.getAspectRetriever())), + retrieverContext) + .toList(); + + // Verify test + assertEquals(testOutput.size(), 1, "Expected one patch to remove assets from old data product"); + + MCPItem patchItem = testOutput.get(0); + assertEquals( + patchItem.getUrn(), TEST_PRODUCT_URN_2, "Patch should target the old data product"); + assertEquals(patchItem.getAspectName(), DATA_PRODUCT_PROPERTIES_ASPECT_NAME); + + // Verify the patch contains remove operations for all 100 assets + JsonArray patchArray = ((PatchItemImpl) patchItem).getPatch().toJsonArray(); + assertEquals(patchArray.size(), 100, "Should have 100 remove operations"); + + // Verify each remove operation + for (int i = 0; i < 100; i++) { + JsonObject op = patchArray.getJsonObject(i); + assertEquals(op.getString("op"), PatchOperationType.REMOVE.getValue()); + assertEquals( + op.getString("path"), + String.format("/assets/urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_%d,PROD)", i)); + } + } + + @Test + public void testUpsertWithPreviousAspect() { + DataProductUnsetSideEffect test = new DataProductUnsetSideEffect(); + test.setConfig(TEST_PLUGIN_CONFIG); + + // Case 1: UPSERT with new additions + DataProductProperties previousProperties = new DataProductProperties(); + DataProductAssociationArray previousAssociations = new DataProductAssociationArray(); + DataProductAssociation previousAssociation = new DataProductAssociation(); + previousAssociation.setDestinationUrn(DATASET_URN_1); + previousAssociations.add(previousAssociation); + previousProperties.setAssets(previousAssociations); + + // New properties include both old and new datasets + DataProductProperties newProperties = new DataProductProperties(); + DataProductAssociationArray newAssociations = new DataProductAssociationArray(); + DataProductAssociation association1 = new DataProductAssociation(); + association1.setDestinationUrn(DATASET_URN_1); + DataProductAssociation association2 = new DataProductAssociation(); + association2.setDestinationUrn(DATASET_URN_2); + newAssociations.add(association1); + newAssociations.add(association2); + newProperties.setAssets(newAssociations); + + // Create change item with previous aspect + SystemAspect prevData = mock(SystemAspect.class); + when(prevData.getRecordTemplate()).thenReturn(previousProperties); + + ChangeItemImpl dataProductPropertiesChangeItem = + ChangeItemImpl.builder() + .urn(TEST_PRODUCT_URN) + .aspectName(DATA_PRODUCT_PROPERTIES_ASPECT_NAME) + .changeType(ChangeType.UPSERT) + .entitySpec(TEST_REGISTRY.getEntitySpec(DATA_PRODUCT_ENTITY_NAME)) + .aspectSpec( + TEST_REGISTRY + .getEntitySpec(DATA_PRODUCT_ENTITY_NAME) + .getAspectSpec(DATA_PRODUCT_PROPERTIES_ASPECT_NAME)) + .recordTemplate(newProperties) + .previousSystemAspect(prevData) + .auditStamp(AuditStampUtils.createDefaultAuditStamp()) + .build(mockAspectRetriever); + + List testOutput = + test.postMCPSideEffect( + List.of( + MCLItemImpl.builder() + .build( + dataProductPropertiesChangeItem, + null, + null, + retrieverContext.getAspectRetriever())), + retrieverContext) + .toList(); + + // Verify that only one patch is generated for the new dataset + assertEquals( + testOutput.size(), 1, "Expected removal of previous data product for new dataset only"); + MCPItem patchItem = testOutput.get(0); + assertEquals( + patchItem.getUrn(), TEST_PRODUCT_URN_2, "Patch should target the old data product"); + GenericJsonPatch.PatchOp expectedPatchOp = new GenericJsonPatch.PatchOp(); + expectedPatchOp.setOp(PatchOperationType.REMOVE.getValue()); + expectedPatchOp.setPath(String.format("/assets/%s", DATASET_URN_2)); + + // Case 2: UPSERT with no new additions + DataProductProperties sameProperties = new DataProductProperties(); + DataProductAssociationArray sameAssociations = new DataProductAssociationArray(); + DataProductAssociation sameAssociation = new DataProductAssociation(); + sameAssociation.setDestinationUrn(DATASET_URN_1); + sameAssociations.add(sameAssociation); + sameProperties.setAssets(sameAssociations); + + SystemAspect prevSameData = mock(SystemAspect.class); + when(prevData.getRecordTemplate()).thenReturn(sameProperties); + + ChangeItemImpl noChangeItem = + ChangeItemImpl.builder() + .urn(TEST_PRODUCT_URN) + .aspectName(DATA_PRODUCT_PROPERTIES_ASPECT_NAME) + .changeType(ChangeType.UPSERT) + .entitySpec(TEST_REGISTRY.getEntitySpec(DATA_PRODUCT_ENTITY_NAME)) + .aspectSpec( + TEST_REGISTRY + .getEntitySpec(DATA_PRODUCT_ENTITY_NAME) + .getAspectSpec(DATA_PRODUCT_PROPERTIES_ASPECT_NAME)) + .recordTemplate(sameProperties) + .previousSystemAspect(prevSameData) + .auditStamp(AuditStampUtils.createDefaultAuditStamp()) + .build(mockAspectRetriever); + + List noChangeOutput = + test.postMCPSideEffect( + List.of( + MCLItemImpl.builder() + .build(noChangeItem, null, null, retrieverContext.getAspectRetriever())), + retrieverContext) + .toList(); + + // Verify no patches are generated when there are no new additions + assertEquals(noChangeOutput.size(), 0, "Expected no changes when assets are the same"); + } + private static DataProductProperties getTestDataProductProperties(Urn destinationUrn) { DataProductProperties dataProductProperties = new DataProductProperties(); DataProductAssociationArray dataProductAssociations = new DataProductAssociationArray(); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java index 04c9297b1ed7a..f2ed2fddba765 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java @@ -1,6 +1,8 @@ package com.linkedin.metadata.entity; import static com.linkedin.metadata.Constants.CORP_USER_ENTITY_NAME; +import static com.linkedin.metadata.Constants.DATASET_ENTITY_NAME; +import static com.linkedin.metadata.Constants.GLOBAL_TAGS_ASPECT_NAME; import static com.linkedin.metadata.Constants.STATUS_ASPECT_NAME; import static org.mockito.Mockito.mock; import static org.testng.Assert.assertEquals; @@ -8,7 +10,11 @@ import static org.testng.Assert.assertTrue; import com.linkedin.common.AuditStamp; +import com.linkedin.common.GlobalTags; import com.linkedin.common.Status; +import com.linkedin.common.TagAssociation; +import com.linkedin.common.TagAssociationArray; +import com.linkedin.common.urn.TagUrn; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.template.DataTemplateUtil; @@ -18,17 +24,21 @@ import com.linkedin.metadata.AspectGenerationUtils; import com.linkedin.metadata.Constants; import com.linkedin.metadata.EbeanTestUtils; +import com.linkedin.metadata.aspect.patch.GenericJsonPatch; +import com.linkedin.metadata.aspect.patch.PatchOperationType; import com.linkedin.metadata.config.EbeanConfiguration; import com.linkedin.metadata.config.PreProcessHooks; import com.linkedin.metadata.entity.ebean.EbeanAspectDao; import com.linkedin.metadata.entity.ebean.EbeanRetentionService; import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; +import com.linkedin.metadata.entity.ebean.batch.PatchItemImpl; import com.linkedin.metadata.event.EventProducer; import com.linkedin.metadata.key.CorpUserKey; import com.linkedin.metadata.models.registry.EntityRegistryException; import com.linkedin.metadata.query.ListUrnsResult; import com.linkedin.metadata.service.UpdateIndicesService; +import com.linkedin.metadata.utils.AuditStampUtils; import com.linkedin.metadata.utils.PegasusUtils; import com.linkedin.mxe.MetadataChangeProposal; import com.linkedin.mxe.SystemMetadata; @@ -433,6 +443,220 @@ public void testBatchDuplicate() throws Exception { "Expected 2nd item to be the latest"); } + @Test + public void testBatchPatchWithTrailingNoOp() throws Exception { + Urn entityUrn = + UrnUtils.getUrn( + "urn:li:dataset:(urn:li:dataPlatform:snowflake,testBatchPatchWithTrailingNoOp,PROD)"); + TagUrn tag1 = TagUrn.createFromString("urn:li:tag:tag1"); + Urn tag2 = UrnUtils.getUrn("urn:li:tag:tag2"); + Urn tagOther = UrnUtils.getUrn("urn:li:tag:other"); + + SystemMetadata systemMetadata = AspectGenerationUtils.createSystemMetadata(); + + ChangeItemImpl initialAspectTag1 = + ChangeItemImpl.builder() + .urn(entityUrn) + .aspectName(GLOBAL_TAGS_ASPECT_NAME) + .recordTemplate( + new GlobalTags() + .setTags(new TagAssociationArray(new TagAssociation().setTag(tag1)))) + .systemMetadata(systemMetadata.copy()) + .auditStamp(TEST_AUDIT_STAMP) + .build(TestOperationContexts.emptyAspectRetriever(null)); + + PatchItemImpl patchAdd2 = + PatchItemImpl.builder() + .urn(entityUrn) + .entitySpec(_testEntityRegistry.getEntitySpec(DATASET_ENTITY_NAME)) + .aspectName(GLOBAL_TAGS_ASPECT_NAME) + .aspectSpec( + _testEntityRegistry + .getEntitySpec(DATASET_ENTITY_NAME) + .getAspectSpec(GLOBAL_TAGS_ASPECT_NAME)) + .patch( + GenericJsonPatch.builder() + .arrayPrimaryKeys(Map.of("properties", List.of("tag"))) + .patch(List.of(tagPatchOp(PatchOperationType.ADD, tag2))) + .build() + .getJsonPatch()) + .auditStamp(AuditStampUtils.createDefaultAuditStamp()) + .build(_testEntityRegistry); + + PatchItemImpl patchRemoveNonExistent = + PatchItemImpl.builder() + .urn(entityUrn) + .entitySpec(_testEntityRegistry.getEntitySpec(DATASET_ENTITY_NAME)) + .aspectName(GLOBAL_TAGS_ASPECT_NAME) + .aspectSpec( + _testEntityRegistry + .getEntitySpec(DATASET_ENTITY_NAME) + .getAspectSpec(GLOBAL_TAGS_ASPECT_NAME)) + .patch( + GenericJsonPatch.builder() + .arrayPrimaryKeys(Map.of("properties", List.of("tag"))) + .patch(List.of(tagPatchOp(PatchOperationType.REMOVE, tagOther))) + .build() + .getJsonPatch()) + .auditStamp(AuditStampUtils.createDefaultAuditStamp()) + .build(_testEntityRegistry); + + // establish base entity + _entityServiceImpl.ingestAspects( + opContext, + AspectsBatchImpl.builder() + .retrieverContext(opContext.getRetrieverContext().get()) + .items(List.of(initialAspectTag1)) + .build(), + false, + true); + + _entityServiceImpl.ingestAspects( + opContext, + AspectsBatchImpl.builder() + .retrieverContext(opContext.getRetrieverContext().get()) + .items(List.of(patchAdd2, patchRemoveNonExistent)) + .build(), + false, + true); + + // List aspects urns + ListUrnsResult batch = _entityServiceImpl.listUrns(opContext, entityUrn.getEntityType(), 0, 1); + + assertEquals(batch.getStart().intValue(), 0); + assertEquals(batch.getCount().intValue(), 1); + assertEquals(batch.getTotal().intValue(), 1); + assertEquals(batch.getEntities().size(), 1); + assertEquals(entityUrn.toString(), batch.getEntities().get(0).toString()); + + EnvelopedAspect envelopedAspect = + _entityServiceImpl.getLatestEnvelopedAspect( + opContext, DATASET_ENTITY_NAME, entityUrn, GLOBAL_TAGS_ASPECT_NAME); + assertEquals( + envelopedAspect.getSystemMetadata().getVersion(), + "2", + "Expected version 2. 1 - Initial, + 1 batch operation (1 add, 1 remove)"); + assertEquals( + new GlobalTags(envelopedAspect.getValue().data()) + .getTags().stream().map(TagAssociation::getTag).collect(Collectors.toSet()), + Set.of(tag1, tag2), + "Expected both tags"); + } + + @Test + public void testBatchPatchAdd() throws Exception { + Urn entityUrn = + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:snowflake,testBatchPatchAdd,PROD)"); + TagUrn tag1 = TagUrn.createFromString("urn:li:tag:tag1"); + TagUrn tag2 = TagUrn.createFromString("urn:li:tag:tag2"); + TagUrn tag3 = TagUrn.createFromString("urn:li:tag:tag3"); + + SystemMetadata systemMetadata = AspectGenerationUtils.createSystemMetadata(); + + ChangeItemImpl initialAspectTag1 = + ChangeItemImpl.builder() + .urn(entityUrn) + .aspectName(GLOBAL_TAGS_ASPECT_NAME) + .recordTemplate( + new GlobalTags() + .setTags(new TagAssociationArray(new TagAssociation().setTag(tag1)))) + .systemMetadata(systemMetadata.copy()) + .auditStamp(TEST_AUDIT_STAMP) + .build(TestOperationContexts.emptyAspectRetriever(null)); + + PatchItemImpl patchAdd3 = + PatchItemImpl.builder() + .urn(entityUrn) + .entitySpec(_testEntityRegistry.getEntitySpec(DATASET_ENTITY_NAME)) + .aspectName(GLOBAL_TAGS_ASPECT_NAME) + .aspectSpec( + _testEntityRegistry + .getEntitySpec(DATASET_ENTITY_NAME) + .getAspectSpec(GLOBAL_TAGS_ASPECT_NAME)) + .patch( + GenericJsonPatch.builder() + .arrayPrimaryKeys(Map.of("properties", List.of("tag"))) + .patch(List.of(tagPatchOp(PatchOperationType.ADD, tag3))) + .build() + .getJsonPatch()) + .auditStamp(AuditStampUtils.createDefaultAuditStamp()) + .build(_testEntityRegistry); + + PatchItemImpl patchAdd2 = + PatchItemImpl.builder() + .urn(entityUrn) + .entitySpec(_testEntityRegistry.getEntitySpec(DATASET_ENTITY_NAME)) + .aspectName(GLOBAL_TAGS_ASPECT_NAME) + .aspectSpec( + _testEntityRegistry + .getEntitySpec(DATASET_ENTITY_NAME) + .getAspectSpec(GLOBAL_TAGS_ASPECT_NAME)) + .patch( + GenericJsonPatch.builder() + .arrayPrimaryKeys(Map.of("properties", List.of("tag"))) + .patch(List.of(tagPatchOp(PatchOperationType.ADD, tag2))) + .build() + .getJsonPatch()) + .auditStamp(AuditStampUtils.createDefaultAuditStamp()) + .build(_testEntityRegistry); + + PatchItemImpl patchAdd1 = + PatchItemImpl.builder() + .urn(entityUrn) + .entitySpec(_testEntityRegistry.getEntitySpec(DATASET_ENTITY_NAME)) + .aspectName(GLOBAL_TAGS_ASPECT_NAME) + .aspectSpec( + _testEntityRegistry + .getEntitySpec(DATASET_ENTITY_NAME) + .getAspectSpec(GLOBAL_TAGS_ASPECT_NAME)) + .patch( + GenericJsonPatch.builder() + .arrayPrimaryKeys(Map.of("properties", List.of("tag"))) + .patch(List.of(tagPatchOp(PatchOperationType.ADD, tag1))) + .build() + .getJsonPatch()) + .auditStamp(AuditStampUtils.createDefaultAuditStamp()) + .build(_testEntityRegistry); + + // establish base entity + _entityServiceImpl.ingestAspects( + opContext, + AspectsBatchImpl.builder() + .retrieverContext(opContext.getRetrieverContext().get()) + .items(List.of(initialAspectTag1)) + .build(), + false, + true); + + _entityServiceImpl.ingestAspects( + opContext, + AspectsBatchImpl.builder() + .retrieverContext(opContext.getRetrieverContext().get()) + .items(List.of(patchAdd3, patchAdd2, patchAdd1)) + .build(), + false, + true); + + // List aspects urns + ListUrnsResult batch = _entityServiceImpl.listUrns(opContext, entityUrn.getEntityType(), 0, 1); + + assertEquals(batch.getStart().intValue(), 0); + assertEquals(batch.getCount().intValue(), 1); + assertEquals(batch.getTotal().intValue(), 1); + assertEquals(batch.getEntities().size(), 1); + assertEquals(entityUrn.toString(), batch.getEntities().get(0).toString()); + + EnvelopedAspect envelopedAspect = + _entityServiceImpl.getLatestEnvelopedAspect( + opContext, DATASET_ENTITY_NAME, entityUrn, GLOBAL_TAGS_ASPECT_NAME); + assertEquals(envelopedAspect.getSystemMetadata().getVersion(), "3", "Expected version 3"); + assertEquals( + new GlobalTags(envelopedAspect.getValue().data()) + .getTags().stream().map(TagAssociation::getTag).collect(Collectors.toSet()), + Set.of(tag1, tag2, tag3), + "Expected all tags"); + } + @Test public void dataGeneratorThreadingTest() { DataGenerator dataGenerator = new DataGenerator(opContext, _entityServiceImpl); @@ -659,4 +883,14 @@ public void run() { } } } + + private static GenericJsonPatch.PatchOp tagPatchOp(PatchOperationType op, Urn tagUrn) { + GenericJsonPatch.PatchOp patchOp = new GenericJsonPatch.PatchOp(); + patchOp.setOp(op.getValue()); + patchOp.setPath(String.format("/tags/%s", tagUrn)); + if (PatchOperationType.ADD.equals(op)) { + patchOp.setValue(Map.of("tag", tagUrn.toString())); + } + return patchOp; + } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java index 53f5ebfe59728..654c448fdec94 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java @@ -50,10 +50,8 @@ import com.linkedin.metadata.event.EventProducer; import com.linkedin.metadata.key.CorpUserKey; import com.linkedin.metadata.models.AspectSpec; -import com.linkedin.metadata.models.registry.ConfigEntityRegistry; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.models.registry.EntityRegistryException; -import com.linkedin.metadata.models.registry.MergedEntityRegistry; import com.linkedin.metadata.run.AspectRowSummary; import com.linkedin.metadata.service.UpdateIndicesService; import com.linkedin.metadata.snapshot.CorpUserSnapshot; @@ -75,6 +73,7 @@ import com.linkedin.structured.StructuredPropertyValueAssignmentArray; import com.linkedin.util.Pair; import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.test.metadata.context.TestOperationContexts; import jakarta.annotation.Nonnull; import java.util.ArrayList; import java.util.Arrays; @@ -113,18 +112,13 @@ public abstract class EntityServiceTest browsePaths = browseDAO.getBrowsePaths(opContext, "dataset", dummyUrn); assertEquals(browsePaths.size(), 1); assertEquals(browsePaths.get(0), "foo"); + + // Test the case of null browsePaths field + sourceMap.put("browsePaths", Collections.singletonList(null)); + when(mockSearchHit.getSourceAsMap()).thenReturn(sourceMap); + when(mockSearchHits.getHits()).thenReturn(new SearchHit[] {mockSearchHit}); + when(mockSearchResponse.getHits()).thenReturn(mockSearchHits); + when(mockClient.search(any(), eq(RequestOptions.DEFAULT))).thenReturn(mockSearchResponse); + List nullBrowsePaths = browseDAO.getBrowsePaths(opContext, "dataset", dummyUrn); + assertEquals(nullBrowsePaths.size(), 0); } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/PropertyDefinitionValidatorTest.java b/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/PropertyDefinitionValidatorTest.java index 2af731a51145e..18949f0566dd1 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/PropertyDefinitionValidatorTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/PropertyDefinitionValidatorTest.java @@ -397,4 +397,40 @@ public void testCanChangeAllowedValueDescriptions() .count(), 0); } + + @Test + public void testUrnIdWithSpace() + throws URISyntaxException, CloneNotSupportedException, AspectValidationException { + Urn propertyUrn = UrnUtils.getUrn("urn:li:structuredProperty:test me out.foo.bar"); + StructuredPropertyDefinition newProperty = new StructuredPropertyDefinition(); + newProperty.setEntityTypes(new UrnArray(Urn.createFromString("urn:li:logicalEntity:dataset"))); + newProperty.setDisplayName("oldProp"); + newProperty.setQualifiedName("foo.bar"); + newProperty.setCardinality(PropertyCardinality.MULTIPLE); + newProperty.setValueType(Urn.createFromString("urn:li:logicalType:STRING")); + assertEquals( + PropertyDefinitionValidator.validateDefinitionUpserts( + TestMCP.ofOneMCP(propertyUrn, null, newProperty, entityRegistry), + mockRetrieverContext) + .count(), + 1); + } + + @Test + public void testQualifiedNameWithSpace() + throws URISyntaxException, CloneNotSupportedException, AspectValidationException { + Urn propertyUrn = UrnUtils.getUrn("urn:li:structuredProperty:foo.bar"); + StructuredPropertyDefinition newProperty = new StructuredPropertyDefinition(); + newProperty.setEntityTypes(new UrnArray(Urn.createFromString("urn:li:logicalEntity:dataset"))); + newProperty.setDisplayName("oldProp"); + newProperty.setQualifiedName("foo.bar with spaces"); + newProperty.setCardinality(PropertyCardinality.MULTIPLE); + newProperty.setValueType(Urn.createFromString("urn:li:logicalType:STRING")); + assertEquals( + PropertyDefinitionValidator.validateDefinitionUpserts( + TestMCP.ofOneMCP(propertyUrn, null, newProperty, entityRegistry), + mockRetrieverContext) + .count(), + 1); + } } diff --git a/metadata-models/src/main/pegasus/com/linkedin/schema/SchemaField.pdl b/metadata-models/src/main/pegasus/com/linkedin/schema/SchemaField.pdl index 0b72d376b0be4..61731e8d37fd6 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/schema/SchemaField.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/schema/SchemaField.pdl @@ -16,7 +16,7 @@ record SchemaField { @Searchable = { "fieldName": "fieldPaths", "fieldType": "TEXT", - "boostScore": 5.0, + "boostScore": 1.0, "queryByDefault": "true" } fieldPath: SchemaFieldPath diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java index b2db0857a6a5c..26e0da8e6fb99 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java @@ -98,8 +98,7 @@ public MCPSideEffect dataProductUnsetSideEffect() { AspectPluginConfig.builder() .enabled(true) .className(DataProductUnsetSideEffect.class.getName()) - .supportedOperations( - List.of("CREATE", "CREATE_ENTITY", "UPSERT", "RESTATE", "DELETE", "PATCH")) + .supportedOperations(List.of("CREATE", "CREATE_ENTITY", "UPSERT", "RESTATE")) .supportedEntityAspectNames( List.of( AspectPluginConfig.EntityAspectName.builder() diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/analytics/Analytics.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/analytics/Analytics.java index 9bbe1bb35fc65..94da6308eda1f 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/analytics/Analytics.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/analytics/Analytics.java @@ -5,7 +5,6 @@ import com.datahub.authorization.AuthUtil; import com.datahub.plugins.auth.authorization.Authorizer; import com.linkedin.analytics.GetTimeseriesAggregatedStatsResponse; -import com.linkedin.metadata.authorization.PoliciesConfig; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.resources.restli.RestliUtils; import com.linkedin.metadata.timeseries.TimeseriesAspectService; @@ -14,7 +13,6 @@ import com.linkedin.restli.server.RestLiServiceException; import com.linkedin.restli.server.annotations.Action; import com.linkedin.restli.server.annotations.ActionParam; -import com.linkedin.restli.server.annotations.Context; import com.linkedin.restli.server.annotations.Optional; import com.linkedin.restli.server.annotations.RestLiSimpleResource; import com.linkedin.restli.server.resources.SimpleResourceTemplate; @@ -24,12 +22,10 @@ import com.linkedin.timeseries.GroupingBucket; import com.linkedin.timeseries.GroupingBucketArray; import java.util.Arrays; -import java.util.List; import javax.annotation.Nonnull; import javax.annotation.Nullable; import javax.inject.Inject; import javax.inject.Named; -import javax.servlet.http.HttpServletRequest; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.metadata.context.RequestContext; @@ -38,6 +34,7 @@ import static com.datahub.authorization.AuthUtil.isAPIAuthorized; import static com.linkedin.metadata.authorization.ApiGroup.TIMESERIES; import static com.linkedin.metadata.authorization.ApiOperation.READ; +import static com.linkedin.metadata.utils.CriterionUtils.validateAndConvert; /** Rest.li entry point: /analytics */ @Slf4j @@ -90,8 +87,9 @@ public Task getTimeseriesStats( resp.setEntityName(entityName); resp.setAspectName(aspectName); resp.setAggregationSpecs(new AggregationSpecArray(Arrays.asList(aggregationSpecs))); - if (filter != null) { - resp.setFilter(filter); + final Filter finalFilter = validateAndConvert(filter); + if (finalFilter != null) { + resp.setFilter(finalFilter); } if (groupingBuckets != null) { resp.setGroupingBuckets(new GroupingBucketArray(Arrays.asList(groupingBuckets))); @@ -99,7 +97,7 @@ public Task getTimeseriesStats( GenericTable aggregatedStatsTable = timeseriesAspectService.getAggregatedStats(opContext, - entityName, aspectName, aggregationSpecs, filter, groupingBuckets); + entityName, aspectName, aggregationSpecs, finalFilter, groupingBuckets); resp.setTable(aggregatedStatsTable); return resp; }); diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java index 37dca1cecd817..a8b9c34ab66ae 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java @@ -11,6 +11,7 @@ import static com.linkedin.metadata.authorization.ApiOperation.READ; import static com.linkedin.metadata.resources.operations.OperationsResource.*; import static com.linkedin.metadata.resources.restli.RestliConstants.*; +import static com.linkedin.metadata.utils.CriterionUtils.validateAndConvert; import com.codahale.metrics.MetricRegistry; import com.datahub.authentication.Authentication; @@ -22,14 +23,12 @@ import com.linkedin.common.urn.Urn; import com.linkedin.metadata.aspect.EnvelopedAspectArray; import com.linkedin.metadata.aspect.VersionedAspect; -import com.linkedin.metadata.aspect.batch.BatchItem; import com.linkedin.metadata.authorization.PoliciesConfig; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.IngestResult; import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; import com.linkedin.metadata.aspect.batch.AspectsBatch; import com.linkedin.metadata.entity.validation.ValidationException; -import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; import com.linkedin.metadata.resources.operations.Utils; @@ -38,7 +37,6 @@ import com.linkedin.metadata.timeseries.TimeseriesAspectService; import com.linkedin.mxe.GenericAspect; import com.linkedin.mxe.MetadataChangeProposal; -import com.linkedin.mxe.SystemMetadata; import com.linkedin.parseq.Task; import com.linkedin.restli.common.HttpStatus; import com.linkedin.restli.internal.server.methods.AnyRecord; @@ -59,8 +57,6 @@ import java.time.Clock; import java.util.Arrays; import java.util.List; -import java.util.Map; -import java.util.Objects; import java.util.Set; import java.util.stream.Collectors; import javax.annotation.Nonnull; @@ -239,7 +235,7 @@ public Task getTimeseriesAspectValues( startTimeMillis, endTimeMillis, limit, - filter, + validateAndConvert(filter), sort))); return response; }, diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java index 30aa3ffa578c1..6c5576f2e5d9f 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java @@ -12,6 +12,7 @@ import static com.linkedin.metadata.entity.validation.ValidationUtils.*; import static com.linkedin.metadata.resources.restli.RestliConstants.*; import static com.linkedin.metadata.search.utils.SearchUtils.*; +import static com.linkedin.metadata.utils.CriterionUtils.validateAndConvert; import static com.linkedin.metadata.utils.PegasusUtils.*; import static com.linkedin.metadata.utils.SystemMetadataUtils.generateSystemMetadataIfEmpty; @@ -401,7 +402,7 @@ public Task search( // This API is not used by the frontend for search bars so we default to structured result = entitySearchService.search(opContext, - List.of(entityName), input, filter, sortCriterionList, start, count); + List.of(entityName), input, validateAndConvert(filter), sortCriterionList, start, count); if (!isAPIAuthorizedResult( opContext, @@ -448,7 +449,7 @@ public Task searchAcrossEntities( log.info("GET SEARCH RESULTS ACROSS ENTITIES for {} with query {}", entityList, input); return RestliUtils.toTask( () -> { - SearchResult result = searchService.searchAcrossEntities(opContext, entityList, input, filter, sortCriterionList, start, count); + SearchResult result = searchService.searchAcrossEntities(opContext, entityList, input, validateAndConvert(filter), sortCriterionList, start, count); if (!isAPIAuthorizedResult( opContext, result)) { @@ -514,7 +515,7 @@ public Task scrollAcrossEntities( opContext, entityList, input, - filter, + validateAndConvert(filter), sortCriterionList, scrollId, keepAlive, @@ -583,7 +584,7 @@ public Task searchAcrossLineage( entityList, input, maxHops, - filter, + validateAndConvert(filter), sortCriterionList, start, count), @@ -648,7 +649,7 @@ public Task scrollAcrossLineage( entityList, input, maxHops, - filter, + validateAndConvert(filter), sortCriterionList, scrollId, keepAlive, @@ -683,10 +684,11 @@ public Task list( List sortCriterionList = getSortCriteria(sortCriteria, sortCriterion); - log.info("GET LIST RESULTS for {} with filter {}", entityName, filter); + final Filter finalFilter = validateAndConvert(filter); + log.info("GET LIST RESULTS for {} with filter {}", entityName, finalFilter); return RestliUtils.toTask( () -> { - SearchResult result = entitySearchService.filter(opContext, entityName, filter, sortCriterionList, start, count); + SearchResult result = entitySearchService.filter(opContext, entityName, finalFilter, sortCriterionList, start, count); if (!AuthUtil.isAPIAuthorizedResult( opContext, result)) { diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java index beb8bd3d090a5..445724f0144e6 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java @@ -363,7 +363,9 @@ List ingestAspects( * @param auditStamp an {@link AuditStamp} containing metadata about the writer & current time * @param systemMetadata * @return the {@link RecordTemplate} representation of the written aspect object + * @deprecated See Conditional Write ChangeType CREATE */ + @Deprecated RecordTemplate ingestAspectIfNotPresent( @Nonnull OperationContext opContext, @Nonnull Urn urn, diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/utils/CriterionUtils.java b/metadata-utils/src/main/java/com/linkedin/metadata/utils/CriterionUtils.java index e40c4af1e0ae7..f8e138487fc16 100644 --- a/metadata-utils/src/main/java/com/linkedin/metadata/utils/CriterionUtils.java +++ b/metadata-utils/src/main/java/com/linkedin/metadata/utils/CriterionUtils.java @@ -1,17 +1,81 @@ package com.linkedin.metadata.utils; +import static com.linkedin.metadata.Constants.URN_LI_PREFIX; + import com.linkedin.data.template.StringArray; import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.Criterion; +import com.linkedin.metadata.query.filter.Filter; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; +import java.util.List; import java.util.stream.Collectors; import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.extern.slf4j.Slf4j; +@Slf4j public class CriterionUtils { private CriterionUtils() {} + /** + * This function is meant to validate and correct Filter input for rest.li endpoints. + * + * @param inputFilter the rest.li filter parameter + * @return validated and corrected filter + */ + @Nullable + public static Filter validateAndConvert(@Nullable Filter inputFilter) { + if (inputFilter != null) { + List invalidCriterion = new ArrayList<>(); + if (inputFilter.hasCriteria()) { + invalidCriterion.addAll( + inputFilter.getCriteria().stream() + .filter( + criterion -> + (criterion.hasValue() && !criterion.getValue().isEmpty()) + || !criterion.hasValue()) + .collect(Collectors.toList())); + } + if (inputFilter.hasOr()) { + invalidCriterion.addAll( + inputFilter.getOr().stream() + .flatMap(c -> c.getAnd().stream()) + .filter( + criterion -> + (criterion.hasValue() && !criterion.getValue().isEmpty()) + || !criterion.hasValue()) + .collect(Collectors.toList())); + } + + for (Criterion criterion : invalidCriterion) { + if (criterion.hasValue()) { + if ((criterion.getValue().contains(",") + && !criterion.getValue().startsWith(URN_LI_PREFIX)) + || criterion.getValue().contains(")," + URN_LI_PREFIX)) { + throw new IllegalArgumentException( + "Criterion `value` is deprecated and contains an ambiguous comma. Please use `values`."); + } + if (criterion.hasValues() && !criterion.getValue().equals(criterion.getValues().get(0))) { + throw new IllegalArgumentException( + "Criterion `value` is deprecated and `values`[0] is populated with a conflicting value."); + } + // auto-convert + if (!criterion.hasValues()) { + log.error( + "Deprecated use of a filter using Criterion's `value` has been detected and corrected. Please migrate to `values` instead."); + criterion.setValues(new StringArray(criterion.getValue())); + } + } + // must be set per required field + criterion.setValue(""); + } + } + return inputFilter; + } + public static Criterion buildExistsCriterion(@Nonnull String field) { return buildCriterion(field, Condition.EXISTS, false, Collections.emptyList()); } diff --git a/metadata-utils/src/test/java/com/linkedin/metadata/utils/CriterionUtilsTest.java b/metadata-utils/src/test/java/com/linkedin/metadata/utils/CriterionUtilsTest.java new file mode 100644 index 0000000000000..e2f22dd665c7c --- /dev/null +++ b/metadata-utils/src/test/java/com/linkedin/metadata/utils/CriterionUtilsTest.java @@ -0,0 +1,274 @@ +package com.linkedin.metadata.utils; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertNull; +import static org.testng.Assert.assertTrue; + +import com.linkedin.data.template.StringArray; +import com.linkedin.metadata.query.filter.ConjunctiveCriterion; +import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; +import com.linkedin.metadata.query.filter.Criterion; +import com.linkedin.metadata.query.filter.CriterionArray; +import com.linkedin.metadata.query.filter.Filter; +import org.testng.annotations.Test; + +public class CriterionUtilsTest { + @Test + public void testNullFilter() { + Filter result = CriterionUtils.validateAndConvert(null); + assertNull(result); + } + + @Test + public void testEmptyFilter() { + Filter input = new Filter(); + Filter result = CriterionUtils.validateAndConvert(input); + assertNotNull(result); + assertFalse(result.hasCriteria()); + assertFalse(result.hasOr()); + } + + @Test + public void testSimpleCriterionConversion() { + Filter input = new Filter(); + Criterion criterion = new Criterion(); + criterion.setValue("testValue"); + input.setCriteria(new CriterionArray(criterion)); + + Filter result = CriterionUtils.validateAndConvert(input); + + Criterion convertedCriterion = result.getCriteria().get(0); + assertEquals(convertedCriterion.getValue(), ""); + assertTrue(convertedCriterion.hasValues()); + assertEquals("testValue", convertedCriterion.getValues().get(0)); + } + + @Test + public void testOrClauseCriterionConversion() { + Filter input = new Filter(); + + // Create OR clause with AND criteria + Criterion criterion = new Criterion(); + criterion.setValue("orValue"); + + ConjunctiveCriterion conjunctive = new ConjunctiveCriterion(); + conjunctive.setAnd(new CriterionArray(criterion)); + + input.setOr(new ConjunctiveCriterionArray(conjunctive)); + + Filter result = CriterionUtils.validateAndConvert(input); + + Criterion convertedCriterion = result.getOr().get(0).getAnd().get(0); + assertEquals(convertedCriterion.getValue(), ""); + assertTrue(convertedCriterion.hasValues()); + assertEquals("orValue", convertedCriterion.getValues().get(0)); + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testCommaInValueThrowsException() { + Filter input = new Filter(); + Criterion criterion = new Criterion(); + criterion.setValue("value1,value2"); + input.setCriteria(new CriterionArray(criterion)); + + CriterionUtils.validateAndConvert(input); + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testConflictingValuesThrowsException() { + Filter input = new Filter(); + Criterion criterion = new Criterion(); + criterion.setValue("value1"); + criterion.setValues(new StringArray("differentValue")); + input.setCriteria(new CriterionArray(criterion)); + + CriterionUtils.validateAndConvert(input); + } + + @Test + public void testExistingValuesNotModified() { + Filter input = new Filter(); + Criterion criterion = new Criterion(); + criterion.setValue("value1"); + criterion.setValues(new StringArray("value1")); // Same value, should not throw exception + input.setCriteria(new CriterionArray(criterion)); + + Filter result = CriterionUtils.validateAndConvert(input); + + Criterion convertedCriterion = result.getCriteria().get(0); + assertEquals(convertedCriterion.getValue(), ""); + assertTrue(convertedCriterion.hasValues()); + assertEquals("value1", convertedCriterion.getValues().get(0)); + } + + @Test + public void testMultipleCriteriaConversion() { + Filter input = new Filter(); + + Criterion criterion1 = new Criterion(); + criterion1.setValue("value1"); + + Criterion criterion2 = new Criterion(); + criterion2.setValue("value2"); + + input.setCriteria(new CriterionArray(criterion1, criterion2)); + + Filter result = CriterionUtils.validateAndConvert(input); + + assertEquals(2, result.getCriteria().size()); + + for (Criterion c : result.getCriteria()) { + assertEquals(c.getValue(), ""); + assertTrue(c.hasValues()); + assertTrue(c.getValues().get(0).equals("value1") || c.getValues().get(0).equals("value2")); + } + } + + @Test + public void testMixedCriteriaAndOrClause() { + Filter input = new Filter(); + + // Add direct criteria + Criterion criterion1 = new Criterion(); + criterion1.setValue("directValue"); + input.setCriteria(new CriterionArray(criterion1)); + + // Add OR clause with AND criteria + Criterion criterion2 = new Criterion(); + criterion2.setValue("orValue"); + ConjunctiveCriterion conjunctive = new ConjunctiveCriterion(); + conjunctive.setAnd(new CriterionArray(criterion2)); + input.setOr(new ConjunctiveCriterionArray(conjunctive)); + + Filter result = CriterionUtils.validateAndConvert(input); + + // Check direct criterion + Criterion convertedDirect = result.getCriteria().get(0); + assertEquals(convertedDirect.getValue(), ""); + assertTrue(convertedDirect.hasValues()); + assertEquals("directValue", convertedDirect.getValues().get(0)); + + // Check OR clause criterion + Criterion convertedOr = result.getOr().get(0).getAnd().get(0); + assertEquals(convertedOr.getValue(), ""); + assertTrue(convertedOr.hasValues()); + assertEquals("orValue", convertedOr.getValues().get(0)); + } + + @Test + public void testEmptyStringValueNotConverted() { + Filter input = new Filter(); + Criterion criterion = new Criterion(); + criterion.setValue(""); // Empty string value + input.setCriteria(new CriterionArray(criterion)); + + Filter result = CriterionUtils.validateAndConvert(input); + + Criterion convertedCriterion = result.getCriteria().get(0); + assertEquals(convertedCriterion.getValue(), ""); + assertFalse(convertedCriterion.hasValues()); // Should not be converted since value was empty + } + + @Test + public void testMixedEmptyAndNonEmptyValues() { + Filter input = new Filter(); + + Criterion emptyCriterion = new Criterion(); + emptyCriterion.setValue(""); + + Criterion nonEmptyCriterion = new Criterion(); + nonEmptyCriterion.setValue("value1"); + + input.setCriteria(new CriterionArray(emptyCriterion, nonEmptyCriterion)); + + Filter result = CriterionUtils.validateAndConvert(input); + + assertEquals(2, result.getCriteria().size()); + + // Check empty criterion + Criterion convertedEmpty = result.getCriteria().get(0); + assertEquals(convertedEmpty.getValue(), ""); + assertFalse(convertedEmpty.hasValues()); + + // Check non-empty criterion + Criterion convertedNonEmpty = result.getCriteria().get(1); + assertEquals(convertedNonEmpty.getValue(), ""); + assertTrue(convertedNonEmpty.hasValues()); + assertEquals(convertedNonEmpty.getValues().get(0), "value1"); + } + + @Test + public void testOrClauseWithEmptyValues() { + Filter input = new Filter(); + + // Create OR clause with mixed empty and non-empty criteria + Criterion emptyCriterion = new Criterion(); + emptyCriterion.setValue(""); + + Criterion nonEmptyCriterion = new Criterion(); + nonEmptyCriterion.setValue("orValue"); + + ConjunctiveCriterion conjunctive = new ConjunctiveCriterion(); + conjunctive.setAnd(new CriterionArray(emptyCriterion, nonEmptyCriterion)); + + input.setOr(new ConjunctiveCriterionArray(conjunctive)); + + Filter result = CriterionUtils.validateAndConvert(input); + + // Check empty criterion + Criterion convertedEmpty = result.getOr().get(0).getAnd().get(0); + assertEquals(convertedEmpty.getValue(), ""); + assertFalse(convertedEmpty.hasValues()); + + // Check non-empty criterion + Criterion convertedNonEmpty = result.getOr().get(0).getAnd().get(1); + assertEquals(convertedNonEmpty.getValue(), ""); + assertTrue(convertedNonEmpty.hasValues()); + assertEquals(convertedNonEmpty.getValues().get(0), "orValue"); + } + + @Test + public void testCriterionWithOnlyValues() { + Filter input = new Filter(); + Criterion criterion = new Criterion(); + criterion.setValues(new StringArray("value1")); // Only has values, no value field set + input.setCriteria(new CriterionArray(criterion)); + + Filter result = CriterionUtils.validateAndConvert(input); + + Criterion convertedCriterion = result.getCriteria().get(0); + assertEquals(convertedCriterion.getValue(), ""); + assertTrue(convertedCriterion.hasValues()); + assertEquals(convertedCriterion.getValues().get(0), "value1"); + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testMultiUrnThrowsException() { + Filter input = new Filter(); + Criterion criterion = new Criterion(); + criterion.setValue( + "urn:li:dataset:(urn:li:dataPlatform:postgres,foo,PROD),urn:li:dataset:(urn:li:dataPlatform:postgres,foo,PROD)"); + input.setCriteria(new CriterionArray(criterion)); + + CriterionUtils.validateAndConvert(input); + } + + @Test + public void testUrnConversion() { + Filter input = new Filter(); + Criterion criterion = new Criterion(); + criterion.setValue("urn:li:dataset:(urn:li:dataPlatform:postgres,foo,PROD)"); + input.setCriteria(new CriterionArray(criterion)); + + Filter result = CriterionUtils.validateAndConvert(input); + + Criterion convertedCriterion = result.getCriteria().get(0); + assertEquals(convertedCriterion.getValue(), ""); + assertTrue(convertedCriterion.hasValues()); + assertEquals( + "urn:li:dataset:(urn:li:dataPlatform:postgres,foo,PROD)", + convertedCriterion.getValues().get(0)); + } +}