Skip to content

Commit

Permalink
Merge branch 'datahub-project:master' into upstream-master
Browse files Browse the repository at this point in the history
  • Loading branch information
bda618 authored Dec 13, 2024
2 parents 33a05ac + d5e0513 commit 0c7a412
Show file tree
Hide file tree
Showing 6,930 changed files with 1,272,949 additions and 254,177 deletions.
The diff you're trying to view is too large. We only load the first 3000 changed files.
9 changes: 7 additions & 2 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
**/node_modules/
datahub-frontend/build/
metadata-ingestion/venv/
*/build/
*/*/build/
**/venv/
**/.tox/
**/.mypy_cache/
**/.pytest_cache/
**/__pycache__/
out
**/*.class
# Have to copy gradle/wrapper/gradle-wrapper.jar, can't exclude ALL jars
Expand Down
90 changes: 90 additions & 0 deletions .github/actions/ci-optimization/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
name: "Identify CI Optimizations"
description: "Determine if code changes are specific to certain modules."

outputs:
frontend-only:
description: "Frontend only change"
value: ${{ steps.filter.outputs.frontend == 'true' && steps.filter.outputs.ingestion == 'false' && steps.filter.outputs.backend == 'false' }}
ingestion-only:
description: "Ingestion only change"
value: ${{ steps.filter.outputs.frontend == 'false' && steps.filter.outputs.ingestion == 'true' && steps.filter.outputs.backend == 'false' }}
backend-only:
description: "Backend only change"
value: ${{ steps.filter.outputs.frontend == 'false' && steps.filter.outputs.ingestion == 'false' && steps.filter.outputs.backend == 'true' }}
backend-change:
description: "Backend code has changed"
value: ${{ steps.filter.outputs.backend == 'true' }}
ingestion-change:
description: "Ingestion code has changed"
value: ${{ steps.filter.outputs.ingestion == 'true' }}
ingestion-base-change:
description: "Ingestion base image docker image has changed"
value: ${{ steps.filter.outputs.ingestion-base == 'true' }}
frontend-change:
description: "Frontend code has changed"
value: ${{ steps.filter.outputs.frontend == 'true' }}
docker-change:
description: "Docker code has changed"
value: ${{ steps.filter.outputs.docker == 'true' }}
kafka-setup-change:
description: "Kafka setup docker change"
value: ${{ steps.filter.outputs.kafka-setup == 'true' }}
mysql-setup-change:
description: "Mysql setup docker change"
value: ${{ steps.filter.outputs.mysql-setup == 'true' }}
postgres-setup-change:
description: "Postgres setup docker change"
value: ${{ steps.filter.outputs.postgres-setup == 'true' }}
elasticsearch-setup-change:
description: "Elasticsearch setup docker change"
value: ${{ steps.filter.outputs.elasticsearch-setup == 'true' }}
smoke-test-change:
description: "Smoke test change"
value: ${{ steps.filter.outputs.smoke-test == 'true' }}
runs:
using: "composite"
steps:
- uses: dorny/paths-filter@v3
id: filter
with:
token: "" # Empty token forces it to use raw git commands.
filters: |
frontend:
- "datahub-frontend/**"
- "datahub-web-react/**"
- "docker/datahub-frontend/**"
ingestion:
- "metadata-ingestion-modules/**"
- "metadata-ingestion/**"
- "metadata-models/**"
- "docker/datahub-ingestion-base/**"
- "docker/datahub-ingestion/**"
ingestion-base:
- "docker/datahub-ingestion-base/**"
docker:
- "docker/**"
backend:
- "metadata-models/**"
- "datahub-upgrade/**"
- "entity-registry/**"
- "li-utils/**"
- "metadata-auth/**"
- "metadata-dao-impl/**"
- "metadata-events/**"
- "metadata-io/**"
- "metadata-jobs/**"
- "metadata-service/**"
- "metadata-utils/**"
- "metadata-operation-context/**"
- "datahub-graphql-core/**"
- "docker/**"
kafka-setup:
- "docker/kafka-setup/**"
mysql-setup:
- "docker/mysql-setup/**"
postgres-setup:
- "docker/postgres-setup/**"
elasticsearch-setup:
- "docker/elasticsearch-setup/**"
smoke-test:
- "smoke-test/**"
103 changes: 82 additions & 21 deletions .github/actions/docker-custom-build-and-push/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,39 +20,65 @@ inputs:
required: false

images:
# e.g. linkedin/datahub-gms
# e.g. acryldata/datahub-gms
description: "List of Docker images to use as base name for tags"
required: true
build-args:
description: "List of build-time variables. Same as docker/build-push-action"
required: false
tags:
# e.g. latest,head,sha12345
description: "List of tags to use for the Docker image"
image_tag:
# e.g. pr12345 OR head OR v0.1.2.3
description: "Main tag to use for the Docker image"
required: true
flavor:
description: "Image flavor (e.g., slim, full)"
required: false
target:
description: "Sets the target stage to build"
required: false
depot-project:
# Setting this will use native arm64 docker builds instead of QEMU emulation.
# This speeds up builds by 2-3x.
description: "Depot project id"
required: false

outputs:
image_tag:
description: "Docker image tags"
value: ${{ steps.docker_meta.outputs.tags }}
# image_name: ${{ env.DATAHUB_GMS_IMAGE }}

runs:
using: "composite"

steps:
- name: Docker meta
id: docker_meta
uses: crazy-max/ghaction-docker-meta@v1
uses: docker/metadata-action@v5
with:
# list of Docker images to use as base name for tags
images: ${{ inputs.images }}
# add git short SHA as Docker tag
tag-custom: ${{ inputs.tags }}
tag-custom-only: true

flavor: |
latest=false
tags: |
type=raw,value=${{ inputs.image_tag }}
type=raw,value=head,suffix=${{ inputs.flavor && format('-{0}', inputs.flavor) || '' }},enable={{is_default_branch}}
type=sha,prefix=,format=short,suffix=${{ inputs.flavor && format('-{0}', inputs.flavor) || '' }}
- name: Single Tag
id: single_tag
shell: bash
run: |
IMAGES="""
${{ inputs.images }}
"""
TAGS="""
${{ inputs.image_tag }}
"""
echo "SINGLE_IMAGE=$(echo $IMAGES | tr '\n' ' ' | awk -F' |,' '{ print $1 }')" >> "$GITHUB_OUTPUT"
echo "SINGLE_TAG=$(echo $IMAGES | tr '\n' ' ' | awk -F' |,' '{ print $1 }'):$(echo $TAGS | tr '\n' ' ' | awk -F' |,' '{ print $1 }')" >> "$GITHUB_OUTPUT"
# Code for testing the build when not pushing to Docker Hub.
- name: Build and Load image for testing (if not publishing)
uses: docker/build-push-action@v3
uses: docker/build-push-action@v6
if: ${{ inputs.publish != 'true' }}
with:
context: ${{ inputs.context }}
Expand All @@ -62,36 +88,71 @@ runs:
platforms: linux/amd64
build-args: ${{ inputs.build-args }}
tags: ${{ steps.docker_meta.outputs.tags }}
target: ${{ inputs.target }}
load: true
push: false
cache-from: |
type=registry,ref=${{ steps.single_tag.outputs.SINGLE_IMAGE }}:head${{ inputs.flavor && format('-{0}', inputs.flavor) || '' }}
type=registry,ref=${{ steps.docker_meta.outputs.tags }}
cache-to: |
type=inline
- name: Upload image locally for testing (if not publishing)
uses: ishworkh/docker-image-artifact-upload@v1
if: ${{ inputs.publish != 'true' }}
with:
image: ${{ steps.docker_meta.outputs.tags }}
image: ${{ steps.single_tag.outputs.SINGLE_TAG }}

# Code for building multi-platform images and pushing to Docker Hub.
- name: Set up QEMU
uses: docker/setup-qemu-action@v2
if: ${{ inputs.publish == 'true' }}
uses: docker/setup-qemu-action@v3
if: ${{ inputs.publish == 'true' && inputs.depot-project == '' }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
if: ${{ inputs.publish == 'true' }}
uses: docker/setup-buildx-action@v3
if: ${{ inputs.publish == 'true' && inputs.depot-project == '' }}
- name: Setup Depot CLI
uses: depot/setup-action@v1
if: ${{ inputs.publish == 'true' && inputs.depot-project != '' }}
- name: Login to DockerHub
uses: docker/login-action@v2
uses: docker/login-action@v3
if: ${{ inputs.publish == 'true' }}
with:
username: ${{ inputs.username }}
password: ${{ inputs.password }}

# Depot variant.
- name: Build and Push Multi-Platform image
uses: docker/build-push-action@v3
if: ${{ inputs.publish == 'true' }}
uses: depot/build-push-action@v1
if: ${{ inputs.publish == 'true' && inputs.depot-project != '' }}
with:
project: ${{ inputs.depot-project }}
context: ${{ inputs.context }}
file: ${{ inputs.file }}
platforms: ${{ inputs.platforms }}
build-args: ${{ inputs.build-args }}
tags: ${{ steps.docker_meta.outputs.tags }}
target: ${{ inputs.target }}
push: true
cache-from: |
type=registry,ref=${{ steps.single_tag.outputs.SINGLE_IMAGE }}:head${{ inputs.flavor && format('-{0}', inputs.flavor) || '' }}
type=registry,ref=${{ steps.docker_meta.outputs.tags }}
cache-to: |
type=inline
- name: Build and Push Multi-Platform image
uses: docker/build-push-action@v6
if: ${{ inputs.publish == 'true' && inputs.depot-project == '' }}
with:
context: ${{ inputs.context }}
file: ${{ inputs.file }}
platforms: ${{ inputs.platforms }}
build-args: ${{ inputs.build-args }}
tags: ${{ steps.docker_meta.outputs.tags }}
target: ${{ inputs.target }}
push: true
cache-from: |
type=registry,ref=${{ steps.single_tag.outputs.SINGLE_IMAGE }}:head${{ inputs.flavor && format('-{0}', inputs.flavor) || '' }}
type=registry,ref=${{ steps.docker_meta.outputs.tags }}
cache-to: |
type=inline
# TODO add code for vuln scanning?
28 changes: 20 additions & 8 deletions .github/scripts/check_policies.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
elif urn == "urn:li:dataHubPolicy:editor-platform-policy":
editor_platform_policy_privileges = policy["info"]["privileges"]
elif urn == "urn:li:dataHubPolicy:7":
all_user_platform_policy_privilges = policy["info"]["privileges"]
all_user_platform_policy_privileges = policy["info"]["privileges"]
try:
doc_type = policy["info"]["type"]
privileges = policy["info"]["privileges"]
Expand Down Expand Up @@ -54,10 +54,22 @@
)
assert len(diff_policies) == 0, f"Missing privileges for root user are {diff_policies}"

diff_policies = set(editor_platform_policy_privileges).difference(
set(all_user_platform_policy_privilges)
)
assert "MANAGE_POLICIES" not in all_user_platform_policy_privilges
assert (
len(diff_policies) == 0
), f"Missing privileges for all user policies are {diff_policies}"
# All users privileges checks
assert "MANAGE_POLICIES" not in all_user_platform_policy_privileges
assert "MANAGE_USERS_AND_GROUPS" not in all_user_platform_policy_privileges
assert "MANAGE_SECRETS" not in all_user_platform_policy_privileges
assert "MANAGE_USER_CREDENTIALS" not in all_user_platform_policy_privileges
assert "MANAGE_ACCESS_TOKENS" not in all_user_platform_policy_privileges
assert "EDIT_ENTITY" not in all_user_platform_policy_privileges
assert "DELETE_ENTITY" not in all_user_platform_policy_privileges

# Editor checks
assert "MANAGE_POLICIES" not in editor_platform_policy_privileges
assert "MANAGE_USERS_AND_GROUPS" not in editor_platform_policy_privileges
assert "MANAGE_SECRETS" not in editor_platform_policy_privileges
assert "MANAGE_USER_CREDENTIALS" not in editor_platform_policy_privileges
assert "MANAGE_ACCESS_TOKENS" not in editor_platform_policy_privileges
# These don't prevent a user from modifying entities they are an asset owner of, i.e. their own profile info
assert "EDIT_CONTACT_INFO" not in editor_platform_policy_privileges
assert "EDIT_USER_PROFILE" not in editor_platform_policy_privileges
assert "EDIT_ENTITY_OWNERS" not in editor_platform_policy_privileges
33 changes: 33 additions & 0 deletions .github/scripts/check_python_package.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import setuptools
import os

folders = ["./smoke-test/tests"]

for folder in folders:
print(f"Checking folder {folder}")
packages = [i for i in setuptools.find_packages(folder) if "cypress" not in i]
namespace_packages = [
i for i in setuptools.find_namespace_packages(folder) if "cypress" not in i
]

print("Packages found:", packages)
print("Namespace packages found:", namespace_packages)

in_packages_not_namespace = set(packages) - set(namespace_packages)
in_namespace_not_packages = set(namespace_packages) - set(packages)

if in_packages_not_namespace:
print(f"Packages not in namespace packages: {in_packages_not_namespace}")
if in_namespace_not_packages:
print(f"Namespace packages not in packages: {in_namespace_not_packages}")
for pkg in in_namespace_not_packages:
pkg_path = os.path.join(folder, pkg.replace(".", os.path.sep))
print(f"Contents of {pkg_path}:")
print(os.listdir(pkg_path))

assert (
len(in_packages_not_namespace) == 0
), f"Found packages in {folder} that are not in namespace packages: {in_packages_not_namespace}"
assert (
len(in_namespace_not_packages) == 0
), f"Found namespace packages in {folder} that are not in packages: {in_namespace_not_packages}"
32 changes: 28 additions & 4 deletions .github/scripts/docker_helpers.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,44 @@ export MAIN_BRANCH="master"
export MAIN_BRANCH_TAG="head"

function get_short_sha {
echo $(git rev-parse --short "$GITHUB_SHA")
echo $(git rev-parse --short "$GITHUB_SHA"|head -c7)
}

export SHORT_SHA=$(get_short_sha)
echo "SHORT_SHA: $SHORT_SHA"

function get_tag {
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}\,${SHORT_SHA},g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1,g')
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG},g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1,g')
}

function get_tag_slim {
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}-slim,g" -e 's,refs/tags/\(.*\),\1-slim,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-slim,g')
}

function get_tag_full {
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}-full,g" -e 's,refs/tags/\(.*\),\1-full,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-full,g')
}

function get_python_docker_release_v {
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},0.0.0+docker.${SHORT_SHA},g" -e 's,refs/tags/v\(.*\),\1+docker,g' -e 's,refs/pull/\([0-9]*\).*,0.0.0+docker.pr\1,g')
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},1!0.0.0+docker.${SHORT_SHA},g" -e 's,refs/tags/v\(.*\),1!\1+docker,g' -e 's,refs/pull/\([0-9]*\).*,1!0.0.0+docker.pr\1,g')
}

function get_unique_tag {
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${SHORT_SHA},g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1,g')
}
}

function get_unique_tag_slim {
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${SHORT_SHA}-slim,g" -e 's,refs/tags/\(.*\),\1-slim,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-slim,g')
}

function get_unique_tag_full {
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${SHORT_SHA}-full,g" -e 's,refs/tags/\(.*\),\1-full,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-full,g')
}

function get_platforms_based_on_branch {
if [ "${{ github.event_name }}" == 'push' && "${{ github.ref }}" == "refs/heads/${MAIN_BRANCH}" ]; then
echo "linux/amd64,linux/arm64"
else
echo "linux/amd64"
fi
}
8 changes: 8 additions & 0 deletions .github/scripts/docker_logs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
TARGET_DIR="${TARGET_DIR:=docker_logs}"
TEST_STRATEGY="${TEST_STRATEGY:=}"

mkdir -p "$TARGET_DIR"
for name in `docker ps -a --format '{{.Names}}'`;
do
docker logs "$name" >& "${TARGET_DIR}/${name}${TEST_STRATEGY}.log" || true
done
Loading

0 comments on commit 0c7a412

Please sign in to comment.