Skip to content

Commit

Permalink
Merge branch 'master' into uv-dep
Browse files Browse the repository at this point in the history
  • Loading branch information
hsheth2 authored Aug 26, 2024
2 parents 272823a + 99824b4 commit 2eb7e3e
Show file tree
Hide file tree
Showing 262 changed files with 37,603 additions and 5,724 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/build-and-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ jobs:
-x :metadata-ingestion-modules:airflow-plugin:check \
-x :metadata-ingestion-modules:dagster-plugin:build \
-x :metadata-ingestion-modules:dagster-plugin:check \
-x :metadata-ingestion-modules:gx-plugin:build \
-x :metadata-ingestion-modules:gx-plugin:check \
-x :datahub-frontend:build \
-x :datahub-web-react:build \
--parallel
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/docker-unified.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ jobs:
mysql_setup_change: ${{ steps.ci-optimize.outputs.mysql-setup-change == 'true' }}
postgres_setup_change: ${{ steps.ci-optimize.outputs.postgres-setup-change == 'true' }}
elasticsearch_setup_change: ${{ steps.ci-optimize.outputs.elasticsearch-setup-change == 'true' }}
smoke_test_change: ${{ steps.ci-optimize.outputs.smoke-test-change == 'true' }}
steps:
- name: Check out the repo
uses: acryldata/sane-checkout-action@v3
Expand Down Expand Up @@ -813,7 +814,7 @@ jobs:
echo 'matrix=["cypress_suite1","cypress_rest"]' >> $GITHUB_OUTPUT
elif [ '${{ needs.setup.outputs.ingestion_only }}' == 'true' ]; then
echo 'matrix=["no_cypress_suite0","no_cypress_suite1"]' >> $GITHUB_OUTPUT
elif [ '${{ needs.setup.outputs.backend_change }}' == 'true' ]; then
elif [[ '${{ needs.setup.outputs.backend_change }}' == 'true' || '${{ needs.setup.outputs.smoke_test_change }}' == 'true' ]]; then
echo 'matrix=["no_cypress_suite0","no_cypress_suite1","cypress_suite1","cypress_rest"]' >> $GITHUB_OUTPUT
else
echo 'matrix=[]' >> $GITHUB_OUTPUT
Expand Down
87 changes: 87 additions & 0 deletions .github/workflows/gx-plugin.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
name: GX Plugin
on:
push:
branches:
- master
paths:
- ".github/workflows/gx-plugin.yml"
- "metadata-ingestion-modules/gx-plugin/**"
- "metadata-ingestion/**"
- "metadata-models/**"
pull_request:
branches:
- master
paths:
- ".github/**"
- "metadata-ingestion-modules/gx-plugin/**"
- "metadata-ingestion/**"
- "metadata-models/**"
release:
types: [published]

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

jobs:
gx-plugin:
runs-on: ubuntu-latest
env:
SPARK_VERSION: 3.0.3
DATAHUB_TELEMETRY_ENABLED: false
strategy:
matrix:
python-version: ["3.8", "3.10"]
include:
- python-version: "3.8"
extraPythonRequirement: "great-expectations~=0.15.12"
- python-version: "3.10"
extraPythonRequirement: "great-expectations~=0.16.0 numpy~=1.26.0"
- python-version: "3.11"
extraPythonRequirement: "great-expectations~=0.17.0"
fail-fast: false
steps:
- name: Set up JDK 17
uses: actions/setup-java@v3
with:
distribution: "zulu"
java-version: 17
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
cache: "pip"
- name: Install dependencies
run: ./metadata-ingestion/scripts/install_deps.sh
- name: Install GX package and test (extras ${{ matrix.extraPythonRequirement }})
run: ./gradlew -Pextra_pip_requirements='${{ matrix.extraPythonRequirement }}' :metadata-ingestion-modules:gx-plugin:lint :metadata-ingestion-modules:gx-plugin:testQuick
- name: pip freeze show list installed
if: always()
run: source metadata-ingestion-modules/gx-plugin/venv/bin/activate && pip freeze
- uses: actions/upload-artifact@v3
if: ${{ always() && matrix.python-version == '3.11' && matrix.extraPythonRequirement == 'great-expectations~=0.17.0' }}
with:
name: Test Results (GX Plugin ${{ matrix.python-version}})
path: |
**/build/reports/tests/test/**
**/build/test-results/test/**
**/junit.*.xml
- name: Upload coverage to Codecov
if: always()
uses: codecov/codecov-action@v3
with:
token: ${{ secrets.CODECOV_TOKEN }}
directory: .
fail_ci_if_error: false
flags: gx-${{ matrix.python-version }}-${{ matrix.extraPythonRequirement }}
name: pytest-gx
verbose: true

event-file:
runs-on: ubuntu-latest
steps:
- name: Upload
uses: actions/upload-artifact@v3
with:
name: Event File
path: ${{ github.event_path }}
2 changes: 1 addition & 1 deletion .github/workflows/test-results.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: Test Results

on:
workflow_run:
workflows: ["build & test", "metadata ingestion", "Airflow Plugin", "Dagster Plugin"]
workflows: ["build & test", "metadata ingestion", "Airflow Plugin", "Dagster Plugin", "GX Plugin"]
types:
- completed

Expand Down
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,10 @@ We welcome contributions from the community. Please refer to our [Contributing G

Join our [Slack workspace](https://datahubproject.io/slack?utm_source=github&utm_medium=readme&utm_campaign=github_readme) for discussions and important announcements. You can also find out more about our upcoming [town hall meetings](docs/townhalls.md) and view past recordings.

## Security

See [Security Stance](docs/SECURITY_STANCE.md) for information on DataHub's Security.

## Adoption

Here are the companies that have officially adopted DataHub. Please feel free to add yours to the list if we missed it.
Expand Down Expand Up @@ -138,6 +142,7 @@ Here are the companies that have officially adopted DataHub. Please feel free to
- [Peloton](https://www.onepeloton.com)
- [PITS Global Data Recovery Services](https://www.pitsdatarecovery.net/)
- [Razer](https://www.razer.com)
- [Rippling](https://www.rippling.com/)
- [Showroomprive](https://www.showroomprive.com/)
- [SpotHero](https://spothero.com)
- [Stash](https://www.stash.com)
Expand All @@ -153,6 +158,7 @@ Here are the companies that have officially adopted DataHub. Please feel free to
- [Zynga](https://www.zynga.com)



## Select Articles & Talks

- [DataHub Blog](https://blog.datahubproject.io/)
Expand Down
6 changes: 3 additions & 3 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ buildscript {
// Releases: https://github.com/linkedin/rest.li/blob/master/CHANGELOG.md
ext.pegasusVersion = '29.57.0'
ext.mavenVersion = '3.6.3'
ext.springVersion = '6.1.5'
ext.springVersion = '6.1.6'
ext.springBootVersion = '3.2.6'
ext.springKafkaVersion = '3.1.6'
ext.openTelemetryVersion = '1.18.0'
Expand All @@ -49,7 +49,7 @@ buildscript {
ext.log4jVersion = '2.23.1'
ext.slf4jVersion = '1.7.36'
ext.logbackClassic = '1.4.14'
ext.hadoop3Version = '3.3.5'
ext.hadoop3Version = '3.3.6'
ext.kafkaVersion = '5.5.15'
ext.hazelcastVersion = '5.3.6'
ext.ebeanVersion = '12.16.1'
Expand Down Expand Up @@ -134,7 +134,7 @@ project.ext.externalDependency = [
'elasticSearchRest': 'org.opensearch.client:opensearch-rest-high-level-client:' + elasticsearchVersion,
'elasticSearchJava': 'org.opensearch.client:opensearch-java:2.6.0',
'findbugsAnnotations': 'com.google.code.findbugs:annotations:3.0.1',
'graphqlJava': 'com.graphql-java:graphql-java:21.3',
'graphqlJava': 'com.graphql-java:graphql-java:21.5',
'graphqlJavaScalars': 'com.graphql-java:graphql-java-extended-scalars:21.0',
'gson': 'com.google.code.gson:gson:2.8.9',
'guice': 'com.google.inject:guice:7.0.0',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2728,19 +2728,23 @@ private void configureFormResolvers(final RuntimeWiring.Builder builder) {
corpUserType,
(env) -> {
final FormActorAssignment actors = env.getSource();
return actors.getUsers().stream()
.map(CorpUser::getUrn)
.collect(Collectors.toList());
return actors.getUsers() != null
? actors.getUsers().stream()
.map(CorpUser::getUrn)
.collect(Collectors.toList())
: null;
}))
.dataFetcher(
"groups",
new LoadableTypeBatchResolver<>(
corpGroupType,
(env) -> {
final FormActorAssignment actors = env.getSource();
return actors.getGroups().stream()
.map(CorpGroup::getUrn)
.collect(Collectors.toList());
return actors.getGroups() != null
? actors.getGroups().stream()
.map(CorpGroup::getUrn)
.collect(Collectors.toList())
: null;
}))
.dataFetcher("isAssignedToMe", new IsFormAssignedToMeResolver(groupService)));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ public static FormActorAssignment mapFormActorAssignment(
if (input.getGroups() != null) {
UrnArray groupUrns = new UrnArray();
input.getGroups().forEach(group -> groupUrns.add(UrnUtils.getUrn(group)));
result.setUsers(groupUrns);
result.setGroups(groupUrns);
}

return result;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ public static boolean isOwnerEqual(
if (!owner.getOwner().equals(ownerUrn)) {
return false;
}
if (owner.getTypeUrn() != null) {
if (owner.getTypeUrn() != null && ownershipTypeUrn != null) {
return owner.getTypeUrn().equals(ownershipTypeUrn);
}
if (ownershipTypeUrn == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ public CompletableFuture<DocPropagationSettings> get(final DataFetchingEnvironme
final GlobalSettingsInfo globalSettings =
_settingsService.getGlobalSettings(context.getOperationContext());
final DocPropagationSettings defaultSettings = new DocPropagationSettings();
defaultSettings.setDocColumnPropagation(true);
// TODO: Enable by default. Currently the automation trusts the settings aspect, which
// does not have this.
defaultSettings.setDocColumnPropagation(false);
return globalSettings != null && globalSettings.hasDocPropagation()
? mapDocPropagationSettings(globalSettings.getDocPropagation())
: defaultSettings;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ public void testIsOwnerEqualOnlyOwnershipTypeUrn() throws URISyntaxException {
Urn technicalOwnershipTypeUrn = new Urn(TECHNICAL_OWNER_OWNERSHIP_TYPE_URN);
Urn businessOwnershipTypeUrn = new Urn(BUSINESS_OWNER_OWNERSHIP_TYPE_URN);
Urn ownerUrn1 = new Urn("urn:li:corpuser:foo");
Urn ownerUrn2 = new Urn("urn:li:corpuser:bar");

Owner ownerWithTechnicalOwnership = new Owner();
ownerWithTechnicalOwnership.setOwner(ownerUrn1);
Expand All @@ -72,12 +73,17 @@ public void testIsOwnerEqualOnlyOwnershipTypeUrn() throws URISyntaxException {
ownerWithoutOwnershipType.setOwner(ownerUrn1);
ownerWithoutOwnershipType.setType(OwnershipType.NONE);

Owner owner2WithoutOwnershipType = new Owner();
owner2WithoutOwnershipType.setOwner(ownerUrn2);
owner2WithoutOwnershipType.setType(OwnershipType.NONE);

assertTrue(
OwnerUtils.isOwnerEqual(ownerWithTechnicalOwnership, ownerUrn1, technicalOwnershipTypeUrn));
assertFalse(
OwnerUtils.isOwnerEqual(ownerWithBusinessOwnership, ownerUrn1, technicalOwnershipTypeUrn));
assertFalse(OwnerUtils.isOwnerEqual(ownerWithTechnicalOwnership, ownerUrn1, null));
assertTrue(OwnerUtils.isOwnerEqual(ownerWithTechnicalOwnership, ownerUrn1, null));
assertTrue(OwnerUtils.isOwnerEqual(ownerWithoutOwnershipType, ownerUrn1, null));
assertFalse(OwnerUtils.isOwnerEqual(owner2WithoutOwnershipType, ownerUrn1, null));
}

public void testIsOwnerEqualWithBothLegacyAndNewType() throws URISyntaxException {
Expand Down
5 changes: 4 additions & 1 deletion datahub-upgrade/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,16 @@ dependencies {
implementation('io.airlift:aircompressor:0.27') {
because("CVE-2024-36114")
}
implementation('dnsjava:dnsjava:3.6.1') {
because("CVE-2024-25638")
}
}


// mock internal schema registry
implementation externalDependency.kafkaAvroSerde
implementation externalDependency.kafkaAvroSerializer
implementation "org.apache.kafka:kafka_2.12:3.7.0"
implementation "org.apache.kafka:kafka_2.12:3.7.1"

implementation externalDependency.slf4jApi
compileOnly externalDependency.lombok
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package com.linkedin.datahub.upgrade.config;

import com.linkedin.datahub.upgrade.system.NonBlockingSystemUpgrade;
import com.linkedin.datahub.upgrade.system.domaindescription.ReindexDomainDescription;
import com.linkedin.metadata.entity.AspectDao;
import com.linkedin.metadata.entity.EntityService;
import io.datahubproject.metadata.context.OperationContext;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Conditional;
import org.springframework.context.annotation.Configuration;

@Configuration
@Conditional(SystemUpdateCondition.NonBlockingSystemUpdateCondition.class)
public class ReindexDomainDescriptionConfig {

@Bean
public NonBlockingSystemUpgrade reindexDomainDescription(
final OperationContext opContext,
final EntityService<?> entityService,
final AspectDao aspectDao,
@Value("${systemUpdate.domainDescription.enabled}") final boolean enabled,
@Value("${systemUpdate.domainDescription.batchSize}") final Integer batchSize,
@Value("${systemUpdate.domainDescription.delayMs}") final Integer delayMs,
@Value("${systemUpdate.domainDescription.limit}") final Integer limit) {
return new ReindexDomainDescription(
opContext, entityService, aspectDao, enabled, batchSize, delayMs, limit);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package com.linkedin.datahub.upgrade.system.domaindescription;

import com.google.common.collect.ImmutableList;
import com.linkedin.datahub.upgrade.UpgradeStep;
import com.linkedin.datahub.upgrade.system.NonBlockingSystemUpgrade;
import com.linkedin.metadata.entity.AspectDao;
import com.linkedin.metadata.entity.EntityService;
import io.datahubproject.metadata.context.OperationContext;
import java.util.List;
import javax.annotation.Nonnull;
import lombok.extern.slf4j.Slf4j;

/**
* A job that reindexes all domain aspects as part of reindexing descriptions This is required to
* fix the analytics for domains
*/
@Slf4j
public class ReindexDomainDescription implements NonBlockingSystemUpgrade {

private final List<UpgradeStep> _steps;

public ReindexDomainDescription(
@Nonnull OperationContext opContext,
EntityService<?> entityService,
AspectDao aspectDao,
boolean enabled,
Integer batchSize,
Integer batchDelayMs,
Integer limit) {
if (enabled) {
_steps =
ImmutableList.of(
new ReindexDomainDescriptionStep(
opContext, entityService, aspectDao, batchSize, batchDelayMs, limit));
} else {
_steps = ImmutableList.of();
}
}

@Override
public String id() {
return this.getClass().getName();
}

@Override
public List<UpgradeStep> steps() {
return _steps;
}
}
Loading

0 comments on commit 2eb7e3e

Please sign in to comment.