Skip to content

Commit

Permalink
Merge branch 'master' into iceberg_performance
Browse files Browse the repository at this point in the history
  • Loading branch information
skrydal authored Aug 21, 2024
2 parents a7c842f + 6b3c06a commit 0320a23
Show file tree
Hide file tree
Showing 137 changed files with 4,422 additions and 1,279 deletions.
3 changes: 1 addition & 2 deletions .github/actions/docker-custom-build-and-push/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,7 @@ runs:
suffix=${{ inputs.flavor && format('-{0}', inputs.flavor) || '' }}
tags: |
type=raw,value=${{ inputs.image_tag }}
type=raw,value=head,enable=${{ github.ref == format('refs/heads/{0}', 'acryl-main') }}
type=ref,event=pr,prefix=pr
type=raw,value=head,enable={{is_default_branch}}
type=sha,prefix=,format=short
# Code for testing the build when not pushing to Docker Hub.
Expand Down
4 changes: 2 additions & 2 deletions .github/scripts/docker_helpers.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ function get_tag {
}

function get_tag_slim {
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}-slim,g" -e 's,refs/tags/\(.*\),\1-slim,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-slim,g'),${SHORT_SHA}-slim
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}-slim,g" -e 's,refs/tags/\(.*\),\1-slim,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-slim,g')
}

function get_tag_full {
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}-full,g" -e 's,refs/tags/\(.*\),\1-full,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-full,g'),${SHORT_SHA}-full
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}-full,g" -e 's,refs/tags/\(.*\),\1-full,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-full,g')
}

function get_python_docker_release_v {
Expand Down
8 changes: 8 additions & 0 deletions .github/scripts/docker_logs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
TARGET_DIR="${TARGET_DIR:=docker_logs}"
TEST_STRATEGY="${TEST_STRATEGY:=}"

mkdir -p "$TARGET_DIR"
for name in `docker ps -a --format '{{.Names}}'`;
do
docker logs "$name" >& "${TARGET_DIR}/${name}${TEST_STRATEGY}.log" || true
done
32 changes: 15 additions & 17 deletions .github/workflows/docker-unified.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ jobs:
mysql_setup_change: ${{ steps.ci-optimize.outputs.mysql-setup-change == 'true' }}
postgres_setup_change: ${{ steps.ci-optimize.outputs.postgres-setup-change == 'true' }}
elasticsearch_setup_change: ${{ steps.ci-optimize.outputs.elasticsearch-setup-change == 'true' }}
smoke_test_change: ${{ steps.ci-optimize.outputs.smoke-test-change == 'true' }}
steps:
- name: Check out the repo
uses: acryldata/sane-checkout-action@v3
Expand Down Expand Up @@ -501,7 +502,7 @@ jobs:
name: Build and Push DataHub Elasticsearch Setup Docker Image
runs-on: ubuntu-latest
needs: setup
if: ${{ needs.setup.outputs.elasticsearch_setup_change == 'true' || (needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true') }}
if: ${{ needs.setup.outputs.elasticsearch_setup_change == 'true' || (needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' ) }}
steps:
- name: Check out the repo
uses: acryldata/sane-checkout-action@v3
Expand Down Expand Up @@ -813,7 +814,7 @@ jobs:
echo 'matrix=["cypress_suite1","cypress_rest"]' >> $GITHUB_OUTPUT
elif [ '${{ needs.setup.outputs.ingestion_only }}' == 'true' ]; then
echo 'matrix=["no_cypress_suite0","no_cypress_suite1"]' >> $GITHUB_OUTPUT
elif [ '${{ needs.setup.outputs.backend_change }}' == 'true' ]; then
elif [[ '${{ needs.setup.outputs.backend_change }}' == 'true' || '${{ needs.setup.outputs.smoke_test_change }}' == 'true' ]]; then
echo 'matrix=["no_cypress_suite0","no_cypress_suite1","cypress_suite1","cypress_rest"]' >> $GITHUB_OUTPUT
else
echo 'matrix=[]' >> $GITHUB_OUTPUT
Expand Down Expand Up @@ -861,11 +862,6 @@ jobs:
with:
python-version: "3.10"
cache: "pip"
- name: Install dependencies
run: ./metadata-ingestion/scripts/install_deps.sh
- name: Build datahub cli
run: |
./gradlew :metadata-ingestion:install
- name: Login to DockerHub
uses: docker/login-action@v3
if: ${{ needs.setup.outputs.docker-login == 'true' }}
Expand Down Expand Up @@ -993,6 +989,15 @@ jobs:
}
}
}'
- name: Disk Check
run: df -h . && docker images
- name: Install dependencies
run: ./metadata-ingestion/scripts/install_deps.sh
- name: Build datahub cli
run: |
./gradlew :metadata-ingestion:install
- name: Disk Check
run: df -h . && docker images
- name: Remove Source Code
run: find ./*/* ! -path "./metadata-ingestion*" ! -path "./smoke-test*" ! -path "./gradle*" -delete
- name: Disk Check
Expand All @@ -1013,21 +1018,14 @@ jobs:
if: failure()
run: |
docker ps -a
docker logs datahub-datahub-gms-1 >& gms-${{ matrix.test_strategy }}.log || true
docker logs datahub-datahub-actions-1 >& actions-${{ matrix.test_strategy }}.log || true
docker logs datahub-datahub-mae-consumer-1 >& mae-${{ matrix.test_strategy }}.log || true
docker logs datahub-datahub-mce-consumer-1 >& mce-${{ matrix.test_strategy }}.log || true
docker logs datahub-broker-1 >& broker-${{ matrix.test_strategy }}.log || true
docker logs datahub-mysql-1 >& mysql-${{ matrix.test_strategy }}.log || true
docker logs datahub-elasticsearch-1 >& elasticsearch-${{ matrix.test_strategy }}.log || true
docker logs datahub-datahub-frontend-react-1 >& frontend-${{ matrix.test_strategy }}.log || true
docker logs datahub-upgrade-1 >& upgrade-${{ matrix.test_strategy }}.log || true
TEST_STRATEGY="-${{ matrix.test_strategy }}"
source .github/scripts/docker_logs.sh
- name: Upload logs
uses: actions/upload-artifact@v3
if: failure()
with:
name: docker logs
path: "*.log"
path: "docker_logs/*.log"
- name: Upload screenshots
uses: actions/upload-artifact@v3
if: failure()
Expand Down
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ Here are the companies that have officially adopted DataHub. Please feel free to
- [Peloton](https://www.onepeloton.com)
- [PITS Global Data Recovery Services](https://www.pitsdatarecovery.net/)
- [Razer](https://www.razer.com)
- [Rippling](https://www.rippling.com/)
- [Showroomprive](https://www.showroomprive.com/)
- [SpotHero](https://spothero.com)
- [Stash](https://www.stash.com)
Expand All @@ -153,6 +154,7 @@ Here are the companies that have officially adopted DataHub. Please feel free to
- [Zynga](https://www.zynga.com)



## Select Articles & Talks

- [DataHub Blog](https://blog.datahubproject.io/)
Expand All @@ -173,6 +175,23 @@ Here are the companies that have officially adopted DataHub. Please feel free to

See the full list [here](docs/links.md).

## Security Notes

### Multi-Component

The DataHub project uses a wide range of code which is responsible for build automation, documentation generation, and
include both service (i.e. GMS) and client (i.e. ingestion) components. When evaluating security vulnerabilities in
upstream dependencies, it is important to consider which component and how it is used in the project. For example, an
upstream javascript library may include a Denial of Service (DoS) vulnerability however when used for generating
documentation it does not affect the running of DataHub itself and cannot be used to impact DataHub's service. Similarly,
python dependencies for ingestion are part of the DataHub client and are not exposed as a service.

### Known False Positives

DataHub's ingestion client does not include credentials in the code repository, python package, or Docker images.
Upstream python dependencies may include files that look like credentials and are often misinterpreted as credentials
by automated scanners.

## License

[Apache License 2.0](./LICENSE).
Original file line number Diff line number Diff line change
Expand Up @@ -2728,19 +2728,23 @@ private void configureFormResolvers(final RuntimeWiring.Builder builder) {
corpUserType,
(env) -> {
final FormActorAssignment actors = env.getSource();
return actors.getUsers().stream()
.map(CorpUser::getUrn)
.collect(Collectors.toList());
return actors.getUsers() != null
? actors.getUsers().stream()
.map(CorpUser::getUrn)
.collect(Collectors.toList())
: null;
}))
.dataFetcher(
"groups",
new LoadableTypeBatchResolver<>(
corpGroupType,
(env) -> {
final FormActorAssignment actors = env.getSource();
return actors.getGroups().stream()
.map(CorpGroup::getUrn)
.collect(Collectors.toList());
return actors.getGroups() != null
? actors.getGroups().stream()
.map(CorpGroup::getUrn)
.collect(Collectors.toList())
: null;
}))
.dataFetcher("isAssignedToMe", new IsFormAssignedToMeResolver(groupService)));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ public static FormActorAssignment mapFormActorAssignment(
if (input.getGroups() != null) {
UrnArray groupUrns = new UrnArray();
input.getGroups().forEach(group -> groupUrns.add(UrnUtils.getUrn(group)));
result.setUsers(groupUrns);
result.setGroups(groupUrns);
}

return result;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ public static boolean isOwnerEqual(
if (!owner.getOwner().equals(ownerUrn)) {
return false;
}
if (owner.getTypeUrn() != null) {
if (owner.getTypeUrn() != null && ownershipTypeUrn != null) {
return owner.getTypeUrn().equals(ownershipTypeUrn);
}
if (ownershipTypeUrn == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ public CompletableFuture<DocPropagationSettings> get(final DataFetchingEnvironme
final GlobalSettingsInfo globalSettings =
_settingsService.getGlobalSettings(context.getOperationContext());
final DocPropagationSettings defaultSettings = new DocPropagationSettings();
defaultSettings.setDocColumnPropagation(true);
// TODO: Enable by default. Currently the automation trusts the settings aspect, which
// does not have this.
defaultSettings.setDocColumnPropagation(false);
return globalSettings != null && globalSettings.hasDocPropagation()
? mapDocPropagationSettings(globalSettings.getDocPropagation())
: defaultSettings;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ public void testIsOwnerEqualOnlyOwnershipTypeUrn() throws URISyntaxException {
Urn technicalOwnershipTypeUrn = new Urn(TECHNICAL_OWNER_OWNERSHIP_TYPE_URN);
Urn businessOwnershipTypeUrn = new Urn(BUSINESS_OWNER_OWNERSHIP_TYPE_URN);
Urn ownerUrn1 = new Urn("urn:li:corpuser:foo");
Urn ownerUrn2 = new Urn("urn:li:corpuser:bar");

Owner ownerWithTechnicalOwnership = new Owner();
ownerWithTechnicalOwnership.setOwner(ownerUrn1);
Expand All @@ -72,12 +73,17 @@ public void testIsOwnerEqualOnlyOwnershipTypeUrn() throws URISyntaxException {
ownerWithoutOwnershipType.setOwner(ownerUrn1);
ownerWithoutOwnershipType.setType(OwnershipType.NONE);

Owner owner2WithoutOwnershipType = new Owner();
owner2WithoutOwnershipType.setOwner(ownerUrn2);
owner2WithoutOwnershipType.setType(OwnershipType.NONE);

assertTrue(
OwnerUtils.isOwnerEqual(ownerWithTechnicalOwnership, ownerUrn1, technicalOwnershipTypeUrn));
assertFalse(
OwnerUtils.isOwnerEqual(ownerWithBusinessOwnership, ownerUrn1, technicalOwnershipTypeUrn));
assertFalse(OwnerUtils.isOwnerEqual(ownerWithTechnicalOwnership, ownerUrn1, null));
assertTrue(OwnerUtils.isOwnerEqual(ownerWithTechnicalOwnership, ownerUrn1, null));
assertTrue(OwnerUtils.isOwnerEqual(ownerWithoutOwnershipType, ownerUrn1, null));
assertFalse(OwnerUtils.isOwnerEqual(owner2WithoutOwnershipType, ownerUrn1, null));
}

public void testIsOwnerEqualWithBothLegacyAndNewType() throws URISyntaxException {
Expand Down
2 changes: 1 addition & 1 deletion datahub-upgrade/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ dependencies {
// mock internal schema registry
implementation externalDependency.kafkaAvroSerde
implementation externalDependency.kafkaAvroSerializer
implementation "org.apache.kafka:kafka_2.12:3.7.0"
implementation "org.apache.kafka:kafka_2.12:3.7.1"

implementation externalDependency.slf4jApi
compileOnly externalDependency.lombok
Expand Down
2 changes: 1 addition & 1 deletion datahub-web-react/.eslintrc.js
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ module.exports = {
],
'vitest/prefer-to-be': 'off',
'@typescript-eslint/no-use-before-define': ['error', { functions: false, classes: false }],
'react-refresh/only-export-components': ['warn', { 'allowConstantExport': true }],
'react-refresh/only-export-components': ['warn', { allowConstantExport: true }],
},
settings: {
react: {
Expand Down
Loading

0 comments on commit 0320a23

Please sign in to comment.