diff --git a/.github/actions/ci-optimization/action.yml b/.github/actions/ci-optimization/action.yml index 2f677a0e552c23..ff901b5de04b65 100644 --- a/.github/actions/ci-optimization/action.yml +++ b/.github/actions/ci-optimization/action.yml @@ -1,5 +1,5 @@ -name: 'Identify CI Optimizations' -description: 'Determine if code changes are specific to certain modules.' +name: "Identify CI Optimizations" +description: "Determine if code changes are specific to certain modules." outputs: frontend-only: @@ -44,27 +44,25 @@ outputs: runs: using: "composite" steps: - - uses: dorny/paths-filter@v2 + - uses: dorny/paths-filter@v3 id: filter with: + token: "" # Empty token forces it to use raw git commands. filters: | frontend: - "datahub-frontend/**" - "datahub-web-react/**" - - "smoke-test/tests/cypress/**" - "docker/datahub-frontend/**" ingestion: - "metadata-ingestion-modules/**" - "metadata-ingestion/**" - "metadata-models/**" - - "smoke-test/**" - "docker/datahub-ingestion**" ingestion-base: - "docker/datahub-ingestion-base/**" docker: - "docker/**" backend: - - ".github/**" - "metadata-models/**" - "datahub-upgrade/**" - "entity-registry/**" @@ -78,7 +76,6 @@ runs: - "metadata-utils/**" - "metadata-operation-context/**" - "datahub-graphql-core/**" - - "smoke-test/**" - "docker/**" kafka-setup: - "docker/kafka-setup/**" diff --git a/.github/actions/docker-custom-build-and-push/action.yml b/.github/actions/docker-custom-build-and-push/action.yml index 1c4a777c14802a..3805b3501ccec0 100644 --- a/.github/actions/docker-custom-build-and-push/action.yml +++ b/.github/actions/docker-custom-build-and-push/action.yml @@ -26,10 +26,13 @@ inputs: build-args: description: "List of build-time variables. Same as docker/build-push-action" required: false - tags: - # e.g. latest,head,sha12345 - description: "List of tags to use for the Docker image" + image_tag: + # e.g. pr12345 OR head OR v0.1.2.3 + description: "Main tag to use for the Docker image" required: true + flavor: + description: 'Image flavor (e.g., slim, full)' + required: false target: description: "Sets the target stage to build" required: false @@ -45,13 +48,16 @@ runs: steps: - name: Docker meta id: docker_meta - uses: crazy-max/ghaction-docker-meta@v1 + uses: docker/metadata-action@v5 with: - # list of Docker images to use as base name for tags images: ${{ inputs.images }} - # add git short SHA as Docker tag - tag-custom: ${{ inputs.tags }} - tag-custom-only: true + flavor: | + latest=false + suffix=${{ inputs.flavor && format('-{0}', inputs.flavor) || '' }} + tags: | + type=raw,value=${{ inputs.image_tag }} + type=raw,value=head,enable={{is_default_branch}} + type=sha,prefix=,format=short # Code for testing the build when not pushing to Docker Hub. - name: Build and Load image for testing (if not publishing) @@ -74,10 +80,13 @@ runs: if: ${{ inputs.publish != 'true' }} shell: bash run: | + IMAGES=""" + ${{ inputs.images }} + """ TAGS=""" - ${{ steps.docker_meta.outputs.tags }} + ${{ inputs.image_tag }} """ - echo "SINGLE_TAG=$(echo $TAGS | tr '\n' ' ' | awk -F' ' '{ print $1 }')" >> $GITHUB_OUTPUT + echo "SINGLE_TAG=$(echo $IMAGES | tr '\n' ' ' | awk -F' |,' '{ print $1 }'):$(echo $TAGS | tr '\n' ' ' | awk -F' |,' '{ print $1 }')" >> $GITHUB_OUTPUT id: single_tag - name: Upload image locally for testing (if not publishing) uses: ishworkh/docker-image-artifact-upload@v1 diff --git a/.github/scripts/docker_helpers.sh b/.github/scripts/docker_helpers.sh index 0487c69eee0ef4..138c8649820ec5 100755 --- a/.github/scripts/docker_helpers.sh +++ b/.github/scripts/docker_helpers.sh @@ -5,22 +5,22 @@ export MAIN_BRANCH="master" export MAIN_BRANCH_TAG="head" function get_short_sha { - echo $(git rev-parse --short "$GITHUB_SHA") + echo $(git rev-parse --short "$GITHUB_SHA"|head -c7) } export SHORT_SHA=$(get_short_sha) echo "SHORT_SHA: $SHORT_SHA" function get_tag { - echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG},g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1,g'),${SHORT_SHA} + echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG},g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1,g') } function get_tag_slim { - echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}-slim,g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-slim,g'),${SHORT_SHA}-slim + echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}-slim,g" -e 's,refs/tags/\(.*\),\1-slim,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-slim,g') } function get_tag_full { - echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}-full,g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-full,g'),${SHORT_SHA}-full + echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}-full,g" -e 's,refs/tags/\(.*\),\1-full,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-full,g') } function get_python_docker_release_v { @@ -32,9 +32,17 @@ function get_unique_tag { } function get_unique_tag_slim { - echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${SHORT_SHA}-slim,g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-slim,g') + echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${SHORT_SHA}-slim,g" -e 's,refs/tags/\(.*\),\1-slim,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-slim,g') } function get_unique_tag_full { - echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${SHORT_SHA}-full,g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-full,g') + echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${SHORT_SHA}-full,g" -e 's,refs/tags/\(.*\),\1-full,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-full,g') +} + +function get_platforms_based_on_branch { + if [ "${{ github.event_name }}" == 'push' && "${{ github.ref }}" == "refs/heads/${MAIN_BRANCH}" ]; then + echo "linux/amd64,linux/arm64" + else + echo "linux/amd64" + fi } diff --git a/.github/scripts/docker_logs.sh b/.github/scripts/docker_logs.sh new file mode 100644 index 00000000000000..918b859fbe5b1d --- /dev/null +++ b/.github/scripts/docker_logs.sh @@ -0,0 +1,8 @@ +TARGET_DIR="${TARGET_DIR:=docker_logs}" +TEST_STRATEGY="${TEST_STRATEGY:=}" + +mkdir -p "$TARGET_DIR" +for name in `docker ps -a --format '{{.Names}}'`; +do + docker logs "$name" >& "${TARGET_DIR}/${name}${TEST_STRATEGY}.log" || true +done \ No newline at end of file diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index c93267947b65a8..b0666f4a42aac8 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -57,6 +57,11 @@ jobs: timeout-minutes: 60 needs: setup steps: + - name: Free up disk space + run: | + sudo apt-get remove 'dotnet-*' azure-cli || true + sudo rm -rf /usr/local/lib/android/ || true + sudo docker image prune -a -f || true - uses: szenius/set-timezone@v1.2 with: timezoneLinux: ${{ matrix.timezone }} diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml index 9487e71e8da3d1..32e68a76a88f5a 100644 --- a/.github/workflows/docker-unified.yml +++ b/.github/workflows/docker-unified.yml @@ -47,7 +47,6 @@ jobs: publish: ${{ steps.publish.outputs.publish }} pr-publish: ${{ steps.pr-publish.outputs.publish }} python_release_version: ${{ steps.tag.outputs.python_release_version }} - short_sha: ${{ steps.tag.outputs.short_sha }} branch_name: ${{ steps.tag.outputs.branch_name }} repository_name: ${{ steps.tag.outputs.repository_name }} frontend_change: ${{ steps.ci-optimize.outputs.frontend-change == 'true' }} @@ -61,6 +60,7 @@ jobs: mysql_setup_change: ${{ steps.ci-optimize.outputs.mysql-setup-change == 'true' }} postgres_setup_change: ${{ steps.ci-optimize.outputs.postgres-setup-change == 'true' }} elasticsearch_setup_change: ${{ steps.ci-optimize.outputs.elasticsearch-setup-change == 'true' }} + smoke_test_change: ${{ steps.ci-optimize.outputs.smoke-test-change == 'true' }} steps: - name: Check out the repo uses: acryldata/sane-checkout-action@v3 @@ -157,7 +157,7 @@ jobs: with: images: | ${{ env.DATAHUB_GMS_IMAGE }} - tags: ${{ needs.setup.outputs.tag }} + image_tag: ${{ needs.setup.outputs.tag }} username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} @@ -221,7 +221,7 @@ jobs: with: images: | ${{ env.DATAHUB_MAE_CONSUMER_IMAGE }} - tags: ${{ needs.setup.outputs.tag }} + image_tag: ${{ needs.setup.outputs.tag }} username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} @@ -285,7 +285,7 @@ jobs: with: images: | ${{ env.DATAHUB_MCE_CONSUMER_IMAGE }} - tags: ${{ needs.setup.outputs.tag }} + image_tag: ${{ needs.setup.outputs.tag }} username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} @@ -349,7 +349,7 @@ jobs: with: images: | ${{ env.DATAHUB_UPGRADE_IMAGE }} - tags: ${{ needs.setup.outputs.tag }} + image_tag: ${{ needs.setup.outputs.tag }} username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} @@ -394,7 +394,7 @@ jobs: name: Build and Push DataHub Frontend Docker Image runs-on: ubuntu-latest needs: setup - if: ${{ needs.setup.outputs.frontend_change == 'true' || needs.setup.outputs.publish == 'true' }} + if: ${{ needs.setup.outputs.frontend_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true'}} steps: - name: Set up JDK 17 uses: actions/setup-java@v3 @@ -415,7 +415,7 @@ jobs: with: images: | ${{ env.DATAHUB_FRONTEND_IMAGE }} - tags: ${{ needs.setup.outputs.tag }} + image_tag: ${{ needs.setup.outputs.tag }} username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} @@ -469,7 +469,7 @@ jobs: with: images: | ${{ env.DATAHUB_KAFKA_SETUP_IMAGE }} - tags: ${{ needs.setup.outputs.tag }} + image_tag: ${{ needs.setup.outputs.tag }} username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} @@ -490,7 +490,7 @@ jobs: with: images: | ${{ env.DATAHUB_MYSQL_SETUP_IMAGE }} - tags: ${{ needs.setup.outputs.tag }} + image_tag: ${{ needs.setup.outputs.tag }} username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} @@ -502,7 +502,7 @@ jobs: name: Build and Push DataHub Elasticsearch Setup Docker Image runs-on: ubuntu-latest needs: setup - if: ${{ needs.setup.outputs.elasticsearch_setup_change == 'true' || (needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true') }} + if: ${{ needs.setup.outputs.elasticsearch_setup_change == 'true' || (needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' ) }} steps: - name: Check out the repo uses: acryldata/sane-checkout-action@v3 @@ -511,7 +511,7 @@ jobs: with: images: | ${{ env.DATAHUB_ELASTIC_SETUP_IMAGE }} - tags: ${{ needs.setup.outputs.tag }} + image_tag: ${{ needs.setup.outputs.tag }} username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} @@ -525,7 +525,7 @@ jobs: outputs: tag: ${{ steps.tag.outputs.tag }} needs: setup - if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' }} + if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} steps: - name: Check out the repo uses: acryldata/sane-checkout-action@v3 @@ -536,7 +536,7 @@ jobs: target: base images: | ${{ env.DATAHUB_INGESTION_BASE_IMAGE }} - tags: ${{ needs.setup.outputs.tag }} + image_tag: ${{ needs.setup.outputs.tag }} username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} @@ -552,7 +552,7 @@ jobs: outputs: tag: ${{ steps.tag.outputs.tag }} needs: [setup, datahub_ingestion_base_build] - if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' }} + if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} steps: - name: Check out the repo uses: acryldata/sane-checkout-action@v3 @@ -574,7 +574,7 @@ jobs: target: slim-install images: | ${{ env.DATAHUB_INGESTION_BASE_IMAGE }} - tags: ${{ needs.setup.outputs.slim_tag }} + image_tag: ${{ needs.setup.outputs.slim_tag }} username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} build-args: | @@ -593,7 +593,7 @@ jobs: outputs: tag: ${{ steps.tag.outputs.tag }} needs: [setup, datahub_ingestion_base_build] - if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' }} + if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} steps: - name: Check out the repo uses: acryldata/sane-checkout-action@v3 @@ -636,7 +636,7 @@ jobs: tag: ${{ steps.tag.outputs.tag }} needs_artifact_download: ${{ needs.setup.outputs.ingestion_change == 'true' && ( needs.setup.outputs.publish != 'true' && needs.setup.outputs.pr-publish != 'true') }} needs: [setup, datahub_ingestion_base_slim_build] - if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' }} + if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} steps: - name: Set up JDK 17 uses: actions/setup-java@v3 @@ -647,7 +647,7 @@ jobs: - name: Check out the repo uses: acryldata/sane-checkout-action@v3 - name: Build codegen - if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' }} + if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish =='true' }} run: ./gradlew :metadata-ingestion:codegen - name: Download Base Image uses: ishworkh/docker-image-artifact-download@v1 @@ -661,7 +661,7 @@ jobs: username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} - name: Build and push Slim Image - if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' }} + if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} uses: ./.github/actions/docker-custom-build-and-push with: target: final @@ -672,7 +672,7 @@ jobs: DOCKER_VERSION=${{ needs.setup.outputs.ingestion_base_change == 'true' && needs.setup.outputs.unique_slim_tag || 'head-slim' }} RELEASE_VERSION=${{ needs.setup.outputs.python_release_version }} APP_ENV=slim - tags: ${{ needs.setup.outputs.slim_tag }} + image_tag: ${{ needs.setup.outputs.slim_tag }} username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} @@ -723,7 +723,7 @@ jobs: tag: ${{ steps.tag.outputs.tag }} needs_artifact_download: ${{ needs.setup.outputs.ingestion_change == 'true' && ( needs.setup.outputs.publish != 'true' && needs.setup.outputs.pr-publish != 'true' ) }} needs: [setup, datahub_ingestion_base_full_build] - if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' }} + if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} steps: - name: Set up JDK 17 uses: actions/setup-java@v3 @@ -734,7 +734,7 @@ jobs: - name: Check out the repo uses: acryldata/sane-checkout-action@v3 - name: Build codegen - if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' }} + if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} run: ./gradlew :metadata-ingestion:codegen - name: Download Base Image uses: ishworkh/docker-image-artifact-download@v1 @@ -748,7 +748,7 @@ jobs: username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} - name: Build and push Full Image - if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' }} + if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} uses: ./.github/actions/docker-custom-build-and-push with: target: final @@ -758,7 +758,7 @@ jobs: BASE_IMAGE=${{ env.DATAHUB_INGESTION_BASE_IMAGE }} DOCKER_VERSION=${{ needs.setup.outputs.ingestion_base_change == 'true' && needs.setup.outputs.unique_tag || 'head' }} RELEASE_VERSION=${{ needs.setup.outputs.python_release_version }} - tags: ${{ needs.setup.outputs.tag }} + image_tag: ${{ needs.setup.outputs.tag }} username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} @@ -776,7 +776,7 @@ jobs: name: "[Monitoring] Scan Datahub Ingestion images for vulnerabilities" runs-on: ubuntu-latest needs: [setup, datahub_ingestion_full_build] - if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' }} + if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} steps: - name: Checkout # adding checkout step just to make trivy upload happy uses: acryldata/sane-checkout-action@v3 @@ -814,7 +814,7 @@ jobs: echo 'matrix=["cypress_suite1","cypress_rest"]' >> $GITHUB_OUTPUT elif [ '${{ needs.setup.outputs.ingestion_only }}' == 'true' ]; then echo 'matrix=["no_cypress_suite0","no_cypress_suite1"]' >> $GITHUB_OUTPUT - elif [ '${{ needs.setup.outputs.backend_change }}' == 'true' ]; then + elif [[ '${{ needs.setup.outputs.backend_change }}' == 'true' || '${{ needs.setup.outputs.smoke_test_change }}' == 'true' ]]; then echo 'matrix=["no_cypress_suite0","no_cypress_suite1","cypress_suite1","cypress_rest"]' >> $GITHUB_OUTPUT else echo 'matrix=[]' >> $GITHUB_OUTPUT @@ -862,11 +862,6 @@ jobs: with: python-version: "3.10" cache: "pip" - - name: Install dependencies - run: ./metadata-ingestion/scripts/install_deps.sh - - name: Build datahub cli - run: | - ./gradlew :metadata-ingestion:install - name: Login to DockerHub uses: docker/login-action@v3 if: ${{ needs.setup.outputs.docker-login == 'true' }} @@ -965,7 +960,7 @@ jobs: echo 'datahub-ingestion head-slim images' docker pull '${{ env.DATAHUB_INGESTION_IMAGE }}:head-slim' if [ '${{ needs.datahub_ingestion_slim_build.outputs.tag || 'head-slim' }}' != 'head-slim' ]; then - docker tag '${{ env.DATAHUB_INGESTION_IMAGE }}:head-slim' '${{ env.DATAHUB_INGESTION_IMAGE }}:${{ needs.datahub_ingestion_slim_build.outputs.tag }}' + docker tag '${{ env.DATAHUB_INGESTION_IMAGE }}:head-slim' '${{ env.DATAHUB_INGESTION_IMAGE }}:${{ needs.setup.outputs.unique_tag }}' fi fi - name: Disk Check @@ -994,6 +989,15 @@ jobs: } } }' + - name: Disk Check + run: df -h . && docker images + - name: Install dependencies + run: ./metadata-ingestion/scripts/install_deps.sh + - name: Build datahub cli + run: | + ./gradlew :metadata-ingestion:install + - name: Disk Check + run: df -h . && docker images - name: Remove Source Code run: find ./*/* ! -path "./metadata-ingestion*" ! -path "./smoke-test*" ! -path "./gradle*" -delete - name: Disk Check @@ -1014,21 +1018,14 @@ jobs: if: failure() run: | docker ps -a - docker logs datahub-datahub-gms-1 >& gms-${{ matrix.test_strategy }}.log || true - docker logs datahub-datahub-actions-1 >& actions-${{ matrix.test_strategy }}.log || true - docker logs datahub-datahub-mae-consumer-1 >& mae-${{ matrix.test_strategy }}.log || true - docker logs datahub-datahub-mce-consumer-1 >& mce-${{ matrix.test_strategy }}.log || true - docker logs datahub-broker-1 >& broker-${{ matrix.test_strategy }}.log || true - docker logs datahub-mysql-1 >& mysql-${{ matrix.test_strategy }}.log || true - docker logs datahub-elasticsearch-1 >& elasticsearch-${{ matrix.test_strategy }}.log || true - docker logs datahub-datahub-frontend-react-1 >& frontend-${{ matrix.test_strategy }}.log || true - docker logs datahub-upgrade-1 >& upgrade-${{ matrix.test_strategy }}.log || true + TEST_STRATEGY="-${{ matrix.test_strategy }}" + source .github/scripts/docker_logs.sh - name: Upload logs uses: actions/upload-artifact@v3 if: failure() with: name: docker logs - path: "*.log" + path: "docker_logs/*.log" - name: Upload screenshots uses: actions/upload-artifact@v3 if: failure() @@ -1049,7 +1046,7 @@ jobs: runs-on: ubuntu-latest needs: [setup, smoke_test] steps: - - uses: aws-actions/configure-aws-credentials@v1 + - uses: aws-actions/configure-aws-credentials@v4 if: ${{ needs.setup.outputs.publish != 'false' && github.repository_owner == 'datahub-project' && needs.setup.outputs.repository_name == 'datahub' }} with: aws-access-key-id: ${{ secrets.AWS_SQS_ACCESS_KEY_ID }} diff --git a/.github/workflows/lint-actions.yml b/.github/workflows/lint-actions.yml index 4d83adbeba08a1..8a1777522f416b 100644 --- a/.github/workflows/lint-actions.yml +++ b/.github/workflows/lint-actions.yml @@ -14,3 +14,8 @@ jobs: - uses: reviewdog/action-actionlint@v1 with: reporter: github-pr-review + permissions: + contents: read + checks: write + pull-requests: write + issues: write diff --git a/.github/workflows/metadata-ingestion.yml b/.github/workflows/metadata-ingestion.yml index 51b97552eb150a..a27013c4bf4887 100644 --- a/.github/workflows/metadata-ingestion.yml +++ b/.github/workflows/metadata-ingestion.yml @@ -46,6 +46,11 @@ jobs: - python-version: "3.10" fail-fast: false steps: + - name: Free up disk space + run: | + sudo apt-get remove 'dotnet-*' azure-cli || true + sudo rm -rf /usr/local/lib/android/ || true + sudo docker image prune -a -f || true - name: Set up JDK 17 uses: actions/setup-java@v3 with: diff --git a/.github/workflows/metadata-io.yml b/.github/workflows/metadata-io.yml index 6797c7ad67c0b6..332330b4ed8984 100644 --- a/.github/workflows/metadata-io.yml +++ b/.github/workflows/metadata-io.yml @@ -47,6 +47,11 @@ jobs: timeout-minutes: 60 needs: setup steps: + - name: Free up disk space + run: | + sudo apt-get remove 'dotnet-*' azure-cli || true + sudo rm -rf /usr/local/lib/android/ || true + sudo docker image prune -a -f || true - uses: acryldata/sane-checkout-action@v3 - name: Set up JDK 17 uses: actions/setup-java@v3 diff --git a/.github/workflows/metadata-model.yml b/.github/workflows/metadata-model.yml index 558b7c80f727c1..d62c03057db3f0 100644 --- a/.github/workflows/metadata-model.yml +++ b/.github/workflows/metadata-model.yml @@ -49,7 +49,7 @@ jobs: run: ./gradlew :metadata-ingestion:modelDocGen - name: Configure AWS Credentials if: ${{ needs.setup.outputs.publish == 'true' }} - uses: aws-actions/configure-aws-credentials@v3 + uses: aws-actions/configure-aws-credentials@v4 with: aws-access-key-id: ${{ secrets.ACRYL_CI_ARTIFACTS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.ACRYL_CI_ARTIFACTS_ACCESS_KEY }} diff --git a/.github/workflows/pr-labeler.yml b/.github/workflows/pr-labeler.yml index 77874abedaabd0..52f83b3be5283d 100644 --- a/.github/workflows/pr-labeler.yml +++ b/.github/workflows/pr-labeler.yml @@ -30,12 +30,10 @@ jobs: "treff7es", "yoonhyejin", "eboneil", - "ethan-cartwright", "gabe-lyons", "hsheth2", "jjoyce0510", "maggiehays", - "mrjefflewis", "pedro93", "RyanHolstien", "Kunal-kankriya", @@ -45,7 +43,8 @@ jobs: "kushagra-apptware", "Salman-Apptware", "mayurinehate", - "noggi" + "noggi", + "skrydal" ]'), github.actor ) @@ -60,7 +59,6 @@ jobs: ${{ contains( fromJson('[ - "skrydal", "siladitya2", "sgomezvillamor", "ngamanda", diff --git a/.github/workflows/publish-datahub-jars.yml b/.github/workflows/publish-datahub-jars.yml index 7137302c73564c..aceee756339ada 100644 --- a/.github/workflows/publish-datahub-jars.yml +++ b/.github/workflows/publish-datahub-jars.yml @@ -45,6 +45,9 @@ jobs: echo "tag=$TAG" >> $GITHUB_OUTPUT publish: runs-on: ubuntu-latest + permissions: + id-token: write + contents: read needs: ["check-secret", "setup"] if: ${{ needs.check-secret.outputs.publish-enabled == 'true' }} steps: diff --git a/.github/workflows/spark-smoke-test.yml b/.github/workflows/spark-smoke-test.yml index 8ffc8420ba9413..d1618c65285773 100644 --- a/.github/workflows/spark-smoke-test.yml +++ b/.github/workflows/spark-smoke-test.yml @@ -44,8 +44,11 @@ jobs: run: ./metadata-ingestion/scripts/install_deps.sh - name: Disk Check run: df -h . && docker images - - name: Remove images - run: docker image prune -a -f || true + - name: Free up disk space + run: | + sudo apt-get remove 'dotnet-*' azure-cli || true + sudo rm -rf /usr/local/lib/android/ || true + sudo docker image prune -a -f || true - name: Disk Check run: df -h . && docker images - name: Smoke test diff --git a/.github/workflows/test-results.yml b/.github/workflows/test-results.yml index c94a5fc340f473..a122ef3835f4d7 100644 --- a/.github/workflows/test-results.yml +++ b/.github/workflows/test-results.yml @@ -10,6 +10,11 @@ jobs: unit-test-results: name: Unit Test Results runs-on: ubuntu-latest + permissions: + contents: read + actions: read + checks: write + issues: read if: github.event.workflow_run.conclusion != 'skipped' steps: diff --git a/README.md b/README.md index b3c2e2d5459410..8aa177c3d66754 100644 --- a/README.md +++ b/README.md @@ -138,7 +138,7 @@ Here are the companies that have officially adopted DataHub. Please feel free to - [Peloton](https://www.onepeloton.com) - [PITS Global Data Recovery Services](https://www.pitsdatarecovery.net/) - [Razer](https://www.razer.com) -- [Saxo Bank](https://www.home.saxo) +- [Rippling](https://www.rippling.com/) - [Showroomprive](https://www.showroomprive.com/) - [SpotHero](https://spothero.com) - [Stash](https://www.stash.com) @@ -154,6 +154,7 @@ Here are the companies that have officially adopted DataHub. Please feel free to - [Zynga](https://www.zynga.com) + ## Select Articles & Talks - [DataHub Blog](https://blog.datahubproject.io/) @@ -174,6 +175,23 @@ Here are the companies that have officially adopted DataHub. Please feel free to See the full list [here](docs/links.md). +## Security Notes + +### Multi-Component + +The DataHub project uses a wide range of code which is responsible for build automation, documentation generation, and +include both service (i.e. GMS) and client (i.e. ingestion) components. When evaluating security vulnerabilities in +upstream dependencies, it is important to consider which component and how it is used in the project. For example, an +upstream javascript library may include a Denial of Service (DoS) vulnerability however when used for generating +documentation it does not affect the running of DataHub itself and cannot be used to impact DataHub's service. Similarly, +python dependencies for ingestion are part of the DataHub client and are not exposed as a service. + +### Known False Positives + +DataHub's ingestion client does not include credentials in the code repository, python package, or Docker images. +Upstream python dependencies may include files that look like credentials and are often misinterpreted as credentials +by automated scanners. + ## License [Apache License 2.0](./LICENSE). diff --git a/datahub-frontend/app/auth/sso/oidc/OidcConfigs.java b/datahub-frontend/app/auth/sso/oidc/OidcConfigs.java index 753edaf89d988f..080ca236630bf3 100644 --- a/datahub-frontend/app/auth/sso/oidc/OidcConfigs.java +++ b/datahub-frontend/app/auth/sso/oidc/OidcConfigs.java @@ -41,6 +41,8 @@ public class OidcConfigs extends SsoConfigs { public static final String OIDC_EXTRACT_JWT_ACCESS_TOKEN_CLAIMS = "auth.oidc.extractJwtAccessTokenClaims"; public static final String OIDC_PREFERRED_JWS_ALGORITHM = "auth.oidc.preferredJwsAlgorithm"; + public static final String OIDC_GRANT_TYPE = "auth.oidc.grantType"; + public static final String OIDC_ACR_VALUES = "auth.oidc.acrValues"; /** Default values */ private static final String DEFAULT_OIDC_USERNAME_CLAIM = "email"; @@ -75,7 +77,9 @@ public class OidcConfigs extends SsoConfigs { private final Optional customParamResource; private final String readTimeout; private final Optional extractJwtAccessTokenClaims; - private Optional preferredJwsAlgorithm; + private final Optional preferredJwsAlgorithm; + private final Optional grantType; + private final Optional acrValues; public OidcConfigs(Builder builder) { super(builder); @@ -98,6 +102,8 @@ public OidcConfigs(Builder builder) { this.readTimeout = builder.readTimeout; this.extractJwtAccessTokenClaims = builder.extractJwtAccessTokenClaims; this.preferredJwsAlgorithm = builder.preferredJwsAlgorithm; + this.acrValues = builder.acrValues; + this.grantType = builder.grantType; } public static class Builder extends SsoConfigs.Builder { @@ -123,6 +129,8 @@ public static class Builder extends SsoConfigs.Builder { private String readTimeout = DEFAULT_OIDC_READ_TIMEOUT; private Optional extractJwtAccessTokenClaims = Optional.empty(); private Optional preferredJwsAlgorithm = Optional.empty(); + private Optional grantType = Optional.empty(); + private Optional acrValues = Optional.empty(); public Builder from(final com.typesafe.config.Config configs) { super.from(configs); @@ -169,6 +177,8 @@ public Builder from(final com.typesafe.config.Config configs) { getOptional(configs, OIDC_EXTRACT_JWT_ACCESS_TOKEN_CLAIMS).map(Boolean::parseBoolean); preferredJwsAlgorithm = Optional.ofNullable(getOptional(configs, OIDC_PREFERRED_JWS_ALGORITHM, null)); + grantType = Optional.ofNullable(getOptional(configs, OIDC_GRANT_TYPE, null)); + acrValues = Optional.ofNullable(getOptional(configs, OIDC_ACR_VALUES, null)); return this; } diff --git a/datahub-frontend/app/auth/sso/oidc/OidcProvider.java b/datahub-frontend/app/auth/sso/oidc/OidcProvider.java index 39a65a46cbf919..a8a3205e8299c8 100644 --- a/datahub-frontend/app/auth/sso/oidc/OidcProvider.java +++ b/datahub-frontend/app/auth/sso/oidc/OidcProvider.java @@ -3,6 +3,8 @@ import auth.sso.SsoProvider; import auth.sso.oidc.custom.CustomOidcClient; import com.google.common.collect.ImmutableMap; +import java.util.HashMap; +import java.util.Map; import lombok.extern.slf4j.Slf4j; import org.pac4j.core.client.Client; import org.pac4j.core.http.callback.PathParameterCallbackUrlResolver; @@ -64,9 +66,19 @@ private Client createPac4jClient() { _oidcConfigs.getResponseType().ifPresent(oidcConfiguration::setResponseType); _oidcConfigs.getResponseMode().ifPresent(oidcConfiguration::setResponseMode); _oidcConfigs.getUseNonce().ifPresent(oidcConfiguration::setUseNonce); + Map customParamsMap = new HashMap<>(); _oidcConfigs .getCustomParamResource() - .ifPresent(value -> oidcConfiguration.setCustomParams(ImmutableMap.of("resource", value))); + .ifPresent(value -> customParamsMap.put("resource", value)); + _oidcConfigs + .getGrantType() + .ifPresent(value -> customParamsMap.put("grant_type", value)); + _oidcConfigs + .getAcrValues() + .ifPresent(value -> customParamsMap.put("acr_values", value)); + if (!customParamsMap.isEmpty()) { + oidcConfiguration.setCustomParams(customParamsMap); + } _oidcConfigs .getPreferredJwsAlgorithm() .ifPresent( diff --git a/datahub-frontend/conf/application.conf b/datahub-frontend/conf/application.conf index dc243ecadafd82..63ff2c9166fbc9 100644 --- a/datahub-frontend/conf/application.conf +++ b/datahub-frontend/conf/application.conf @@ -186,6 +186,8 @@ auth.oidc.customParam.resource = ${?AUTH_OIDC_CUSTOM_PARAM_RESOURCE} auth.oidc.readTimeout = ${?AUTH_OIDC_READ_TIMEOUT} auth.oidc.extractJwtAccessTokenClaims = ${?AUTH_OIDC_EXTRACT_JWT_ACCESS_TOKEN_CLAIMS} # Whether to extract claims from JWT access token. Defaults to false. auth.oidc.preferredJwsAlgorithm = ${?AUTH_OIDC_PREFERRED_JWS_ALGORITHM} # Which jws algorithm to use +auth.oidc.acrValues = ${?AUTH_OIDC_ACR_VALUES} +auth.oidc.grantType = ${?AUTH_OIDC_GRANT_TYPE} # # By default, the callback URL that should be registered with the identity provider is computed as {$baseUrl}/callback/oidc. diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java index db9bf304a1085b..b470da3c7c74aa 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java @@ -2728,9 +2728,11 @@ private void configureFormResolvers(final RuntimeWiring.Builder builder) { corpUserType, (env) -> { final FormActorAssignment actors = env.getSource(); - return actors.getUsers().stream() - .map(CorpUser::getUrn) - .collect(Collectors.toList()); + return actors.getUsers() != null + ? actors.getUsers().stream() + .map(CorpUser::getUrn) + .collect(Collectors.toList()) + : null; })) .dataFetcher( "groups", @@ -2738,9 +2740,11 @@ private void configureFormResolvers(final RuntimeWiring.Builder builder) { corpGroupType, (env) -> { final FormActorAssignment actors = env.getSource(); - return actors.getGroups().stream() - .map(CorpGroup::getUrn) - .collect(Collectors.toList()); + return actors.getGroups() != null + ? actors.getGroups().stream() + .map(CorpGroup::getUrn) + .collect(Collectors.toList()) + : null; })) .dataFetcher("isAssignedToMe", new IsFormAssignedToMeResolver(groupService))); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/resolver/GetChartsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/resolver/GetChartsResolver.java index 767c9b4d4e71bc..4847aea224ccd6 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/resolver/GetChartsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/resolver/GetChartsResolver.java @@ -159,9 +159,10 @@ private SearchResult searchForNewUsers(@Nonnull final OperationContext opContext .setValue( String.valueOf( trailingMonthDateRange.getStart())))))))), - new SortCriterion() - .setField(CORP_USER_STATUS_LAST_MODIFIED_FIELD_NAME) - .setOrder(SortOrder.DESCENDING), + Collections.singletonList( + new SortCriterion() + .setField(CORP_USER_STATUS_LAST_MODIFIED_FIELD_NAME) + .setOrder(SortOrder.DESCENDING)), 0, 100); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/resolver/GetMetadataAnalyticsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/resolver/GetMetadataAnalyticsResolver.java index 01f2e6c8462e39..6045b1e726c7a5 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/resolver/GetMetadataAnalyticsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/resolver/GetMetadataAnalyticsResolver.java @@ -77,7 +77,8 @@ private List getCharts(MetadataAnalyticsInput input, OperationCo } SearchResult searchResult = - _entityClient.searchAcrossEntities(opContext, entities, query, filter, 0, 0, null, null); + _entityClient.searchAcrossEntities( + opContext, entities, query, filter, 0, 0, Collections.emptyList(), null); List aggregationMetadataList = searchResult.getMetadata().getAggregations(); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java index 85a2c09ed79a71..167515a13c4da2 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java @@ -21,5 +21,6 @@ public class FeatureFlags { private boolean schemaFieldEntityFetchEnabled = false; private boolean businessAttributeEntityEnabled = false; private boolean dataContractsEnabled = false; + private boolean editableDatasetNameEnabled = false; private boolean showSeparateSiblings = false; } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/DebugAccessResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/DebugAccessResolver.java index 44604e92c35ded..8372b6b5126a3e 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/DebugAccessResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/DebugAccessResolver.java @@ -33,6 +33,7 @@ import java.net.URISyntaxException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -181,7 +182,7 @@ private Set getPoliciesFor( Constants.POLICY_ENTITY_NAME, "", buildFilterToGetPolicies(user, groups, roles), - sortCriterion, + Collections.singletonList(sortCriterion), 0, 10000) .getEntities() diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolver.java index dc57ed3c673c16..e0ecebbbc7bc2e 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolver.java @@ -59,10 +59,11 @@ public CompletableFuture get(DataFetchingEnvironment envi if (AuthorizationUtils.canManageTokens(context) || isListingSelfTokens(filters, context)) { try { - final SortCriterion sortCriterion = - new SortCriterion() - .setField(EXPIRES_AT_FIELD_NAME) - .setOrder(SortOrder.DESCENDING); + final List sortCriteria = + Collections.singletonList( + new SortCriterion() + .setField(EXPIRES_AT_FIELD_NAME) + .setOrder(SortOrder.DESCENDING)); final SearchResult searchResult = _entityClient.search( context @@ -74,7 +75,7 @@ public CompletableFuture get(DataFetchingEnvironment envi filters, Collections.emptyList(), context.getOperationContext().getAspectRetriever()), - sortCriterion, + sortCriteria, start, count); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java index fb1672d54dc971..259d05c631557d 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java @@ -186,6 +186,7 @@ public CompletableFuture get(final DataFetchingEnvironment environmen .setNestedDomainsEnabled(_featureFlags.isNestedDomainsEnabled()) .setPlatformBrowseV2(_featureFlags.isPlatformBrowseV2()) .setDataContractsEnabled(_featureFlags.isDataContractsEnabled()) + .setEditableDatasetNameEnabled(_featureFlags.isEditableDatasetNameEnabled()) .setShowSeparateSiblings(_featureFlags.isShowSeparateSiblings()) .build(); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/container/ContainerEntitiesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/container/ContainerEntitiesResolver.java index 15927eef236cab..5a3207633c07c8 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/container/ContainerEntitiesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/container/ContainerEntitiesResolver.java @@ -19,6 +19,7 @@ import com.linkedin.metadata.query.filter.Filter; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; +import java.util.Collections; import java.util.List; import java.util.concurrent.CompletableFuture; import lombok.extern.slf4j.Slf4j; @@ -92,7 +93,7 @@ public CompletableFuture get(final DataFetchingEnvironment enviro new CriterionArray(ImmutableList.of(filterCriterion))))), start, count, - null, + Collections.emptyList(), null)); } catch (Exception e) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolver.java index 75796f637525e5..6a880503802cb4 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolver.java @@ -19,6 +19,7 @@ import com.linkedin.metadata.query.filter.Filter; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; +import java.util.Collections; import java.util.concurrent.CompletableFuture; import java.util.stream.Collectors; import lombok.extern.slf4j.Slf4j; @@ -98,7 +99,7 @@ public CompletableFuture get(final DataFetchingEnvironment enviro new ConjunctiveCriterion().setAnd(criteria))), start, count, - null, + Collections.emptyList(), null)); } catch (Exception e) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/ListDomainsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/ListDomainsResolver.java index 0c16470c642b71..e6d4238bc70546 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/ListDomainsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/ListDomainsResolver.java @@ -22,6 +22,7 @@ import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.concurrent.CompletableFuture; import java.util.stream.Collectors; @@ -66,9 +67,10 @@ public CompletableFuture get(final DataFetchingEnvironment en Constants.DOMAIN_ENTITY_NAME, query, filter, - new SortCriterion() - .setField(DOMAIN_CREATED_TIME_INDEX_FIELD_NAME) - .setOrder(SortOrder.DESCENDING), + Collections.singletonList( + new SortCriterion() + .setField(DOMAIN_CREATED_TIME_INDEX_FIELD_NAME) + .setOrder(SortOrder.DESCENDING)), start, count); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/ListGroupsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/ListGroupsResolver.java index fce404a6baa16b..0632af68998dc9 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/ListGroupsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/ListGroupsResolver.java @@ -21,6 +21,7 @@ import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; import java.util.ArrayList; +import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -64,9 +65,10 @@ public CompletableFuture get(final DataFetchingEnvironment env CORP_GROUP_ENTITY_NAME, query, null, - new SortCriterion() - .setField(CORP_GROUP_CREATED_TIME_INDEX_FIELD_NAME) - .setOrder(SortOrder.DESCENDING), + Collections.singletonList( + new SortCriterion() + .setField(CORP_GROUP_CREATED_TIME_INDEX_FIELD_NAME) + .setOrder(SortOrder.DESCENDING)), start, count); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/incident/EntityIncidentsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/incident/EntityIncidentsResolver.java index 2d4b24243073a5..d79634c27d881c 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/incident/EntityIncidentsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/incident/EntityIncidentsResolver.java @@ -21,6 +21,7 @@ import graphql.schema.DataFetchingEnvironment; import java.net.URISyntaxException; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -60,13 +61,13 @@ public CompletableFuture get(DataFetchingEnvironment envi // Index! // We use the search index so that we can easily sort by the last updated time. final Filter filter = buildIncidentsEntityFilter(entityUrn, maybeState); - final SortCriterion sortCriterion = buildIncidentsSortCriterion(); + final List sortCriteria = buildIncidentsSortCriteria(); final SearchResult searchResult = _entityClient.filter( context.getOperationContext(), Constants.INCIDENT_ENTITY_NAME, filter, - sortCriterion, + sortCriteria, start, count); @@ -118,10 +119,10 @@ private Filter buildIncidentsEntityFilter( return QueryUtils.newFilter(criterionMap); } - private SortCriterion buildIncidentsSortCriterion() { + private List buildIncidentsSortCriteria() { final SortCriterion sortCriterion = new SortCriterion(); sortCriterion.setField(CREATED_TIME_SEARCH_INDEX_FIELD_NAME); sortCriterion.setOrder(SortOrder.DESCENDING); - return sortCriterion; + return Collections.singletonList(sortCriterion); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/IngestionSourceExecutionRequestsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/IngestionSourceExecutionRequestsResolver.java index 4a3b75deddc452..a4c2ab42227d9c 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/IngestionSourceExecutionRequestsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/IngestionSourceExecutionRequestsResolver.java @@ -23,6 +23,7 @@ import com.linkedin.metadata.search.SearchResult; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; +import java.util.Collections; import java.util.Map; import java.util.Objects; import java.util.Set; @@ -76,9 +77,10 @@ public CompletableFuture get( new ConjunctiveCriterion() .setAnd( new CriterionArray(ImmutableList.of(filterCriterion))))), - new SortCriterion() - .setField(REQUEST_TIME_MS_FIELD_NAME) - .setOrder(SortOrder.DESCENDING), + Collections.singletonList( + new SortCriterion() + .setField(REQUEST_TIME_MS_FIELD_NAME) + .setOrder(SortOrder.DESCENDING)), start, count); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/ListSecretsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/ListSecretsResolver.java index 106a2d0d1e18e2..bf8d7c800ccae6 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/ListSecretsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/ListSecretsResolver.java @@ -26,6 +26,7 @@ import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; import java.util.ArrayList; +import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -73,9 +74,10 @@ public CompletableFuture get(final DataFetchingEnvironment en Constants.SECRETS_ENTITY_NAME, query, null, - new SortCriterion() - .setField(DOMAIN_CREATED_TIME_INDEX_FIELD_NAME) - .setOrder(SortOrder.DESCENDING), + Collections.singletonList( + new SortCriterion() + .setField(DOMAIN_CREATED_TIME_INDEX_FIELD_NAME) + .setOrder(SortOrder.DESCENDING)), start, count); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/jobs/DataJobRunsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/jobs/DataJobRunsResolver.java index 591712ef3f55ba..09039e530631d0 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/jobs/DataJobRunsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/jobs/DataJobRunsResolver.java @@ -26,6 +26,7 @@ import graphql.schema.DataFetchingEnvironment; import java.net.URISyntaxException; import java.util.ArrayList; +import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -61,13 +62,13 @@ public CompletableFuture get(DataFetchingEnvironment // Index! // We use the search index so that we can easily sort by the last updated time. final Filter filter = buildTaskRunsEntityFilter(entityUrn); - final SortCriterion sortCriterion = buildTaskRunsSortCriterion(); + final List sortCriteria = buildTaskRunsSortCriteria(); final SearchResult gmsResult = _entityClient.filter( context.getOperationContext(), Constants.DATA_PROCESS_INSTANCE_ENTITY_NAME, filter, - sortCriterion, + sortCriteria, start, count); final List dataProcessInstanceUrns = @@ -123,10 +124,10 @@ private Filter buildTaskRunsEntityFilter(final String entityUrn) { return filter; } - private SortCriterion buildTaskRunsSortCriterion() { + private List buildTaskRunsSortCriteria() { final SortCriterion sortCriterion = new SortCriterion(); sortCriterion.setField(CREATED_TIME_SEARCH_INDEX_FIELD_NAME); sortCriterion.setOrder(SortOrder.DESCENDING); - return sortCriterion; + return Collections.singletonList(sortCriterion); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/jobs/EntityRunsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/jobs/EntityRunsResolver.java index 163fc30fb6e6c6..82c5b73d871525 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/jobs/EntityRunsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/jobs/EntityRunsResolver.java @@ -27,6 +27,7 @@ import graphql.schema.DataFetchingEnvironment; import java.net.URISyntaxException; import java.util.ArrayList; +import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -67,13 +68,13 @@ public CompletableFuture get(DataFetchingEnvironment // Index! // We use the search index so that we can easily sort by the last updated time. final Filter filter = buildTaskRunsEntityFilter(entityUrn, direction); - final SortCriterion sortCriterion = buildTaskRunsSortCriterion(); + final List sortCriteria = buildTaskRunsSortCriteria(); final SearchResult gmsResult = _entityClient.filter( context.getOperationContext(), Constants.DATA_PROCESS_INSTANCE_ENTITY_NAME, filter, - sortCriterion, + sortCriteria, start, count); final List dataProcessInstanceUrns = @@ -133,10 +134,10 @@ private Filter buildTaskRunsEntityFilter( return filter; } - private SortCriterion buildTaskRunsSortCriterion() { + private List buildTaskRunsSortCriteria() { final SortCriterion sortCriterion = new SortCriterion(); sortCriterion.setField(CREATED_TIME_SEARCH_INDEX_FIELD_NAME); sortCriterion.setOrder(SortOrder.DESCENDING); - return sortCriterion; + return Collections.singletonList(sortCriterion); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/MutationUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/MutationUtils.java index 8b798b243ca864..7608007e9dda98 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/MutationUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/MutationUtils.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql.resolvers.mutate; import static com.linkedin.metadata.Constants.*; +import static com.linkedin.metadata.utils.SystemMetadataUtils.createDefaultSystemMetadata; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.RecordTemplate; @@ -84,7 +85,7 @@ private static MetadataChangeProposal setProposalProperties( proposal.setChangeType(ChangeType.UPSERT); // Assumes proposal is generated first from the builder methods above so SystemMetadata is empty - SystemMetadata systemMetadata = new SystemMetadata(); + SystemMetadata systemMetadata = createDefaultSystemMetadata(); StringMap properties = new StringMap(); properties.put(APP_SOURCE, UI_SOURCE); systemMetadata.setProperties(properties); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateNameResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateNameResolver.java index 1d90720fc69023..ad6dbbe635ed1f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateNameResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateNameResolver.java @@ -4,9 +4,11 @@ import static com.linkedin.datahub.graphql.resolvers.mutate.MutationUtils.persistAspect; import com.linkedin.businessattribute.BusinessAttributeInfo; +import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.CorpuserUrn; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.template.SetMode; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; @@ -20,6 +22,7 @@ import com.linkedin.datahub.graphql.resolvers.mutate.util.DomainUtils; import com.linkedin.datahub.graphql.resolvers.mutate.util.GlossaryUtils; import com.linkedin.dataproduct.DataProductProperties; +import com.linkedin.dataset.EditableDatasetProperties; import com.linkedin.domain.DomainProperties; import com.linkedin.domain.Domains; import com.linkedin.entity.client.EntityClient; @@ -70,6 +73,8 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw return updateDataProductName(targetUrn, input, context); case Constants.BUSINESS_ATTRIBUTE_ENTITY_NAME: return updateBusinessAttributeName(targetUrn, input, environment.getContext()); + case Constants.DATASET_ENTITY_NAME: + return updateDatasetName(targetUrn, input, environment.getContext()); default: throw new RuntimeException( String.format( @@ -236,6 +241,37 @@ private Boolean updateGroupName(Urn targetUrn, UpdateNameInput input, QueryConte "Unauthorized to perform this action. Please contact your DataHub administrator."); } + // udpates editable dataset properties aspect's name field + private Boolean updateDatasetName(Urn targetUrn, UpdateNameInput input, QueryContext context) { + if (AuthorizationUtils.canEditProperties(targetUrn, context)) { + try { + if (input.getName() != null) { + final EditableDatasetProperties editableDatasetProperties = + new EditableDatasetProperties(); + editableDatasetProperties.setName(input.getName()); + final AuditStamp auditStamp = new AuditStamp(); + Urn actor = UrnUtils.getUrn(context.getActorUrn()); + auditStamp.setActor(actor, SetMode.IGNORE_NULL); + auditStamp.setTime(System.currentTimeMillis()); + editableDatasetProperties.setLastModified(auditStamp); + persistAspect( + context.getOperationContext(), + targetUrn, + Constants.EDITABLE_DATASET_PROPERTIES_ASPECT_NAME, + editableDatasetProperties, + actor, + _entityService); + } + return true; + } catch (Exception e) { + throw new RuntimeException( + String.format("Failed to perform update against input %s", input), e); + } + } + throw new AuthorizationException( + "Unauthorized to perform this action. Please contact your DataHub administrator."); + } + private Boolean updateDataProductName( Urn targetUrn, UpdateNameInput input, QueryContext context) { try { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java index 29056eb71a7a3a..ddb795189c0e3d 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java @@ -171,7 +171,7 @@ public static boolean isOwnerEqual( if (!owner.getOwner().equals(ownerUrn)) { return false; } - if (owner.getTypeUrn() != null) { + if (owner.getTypeUrn() != null && ownershipTypeUrn != null) { return owner.getTypeUrn().equals(ownershipTypeUrn); } if (ownershipTypeUrn == null) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ownership/ListOwnershipTypesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ownership/ListOwnershipTypesResolver.java index 9f6951e44dd735..da0d5dd07a94f0 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ownership/ListOwnershipTypesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ownership/ListOwnershipTypesResolver.java @@ -67,7 +67,7 @@ public CompletableFuture get(DataFetchingEnvironment e filters, Collections.emptyList(), context.getOperationContext().getAspectRetriever()), - DEFAULT_SORT_CRITERION, + Collections.singletonList(DEFAULT_SORT_CRITERION), start, count); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/post/ListPostsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/post/ListPostsResolver.java index 12e4047c2dc4e5..dc7797882371b7 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/post/ListPostsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/post/ListPostsResolver.java @@ -18,7 +18,9 @@ import com.linkedin.metadata.search.SearchResult; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; +import java.util.Collections; import java.util.HashSet; +import java.util.List; import java.util.Map; import java.util.concurrent.CompletableFuture; import java.util.stream.Collectors; @@ -49,10 +51,11 @@ public CompletableFuture get(final DataFetchingEnvironment envi return GraphQLConcurrencyUtils.supplyAsync( () -> { try { - final SortCriterion sortCriterion = - new SortCriterion() - .setField(LAST_MODIFIED_FIELD_NAME) - .setOrder(SortOrder.DESCENDING); + final List sortCriteria = + Collections.singletonList( + new SortCriterion() + .setField(LAST_MODIFIED_FIELD_NAME) + .setOrder(SortOrder.DESCENDING)); // First, get all Post Urns. final SearchResult gmsResult = @@ -61,7 +64,7 @@ public CompletableFuture get(final DataFetchingEnvironment envi POST_ENTITY_NAME, query, null, - sortCriterion, + sortCriteria, start, count); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolver.java index 95be3a68e895c8..aa411f019a4c08 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolver.java @@ -61,8 +61,9 @@ public CompletableFuture get(final DataFetchingEnvironment en return GraphQLConcurrencyUtils.supplyAsync( () -> { try { - final SortCriterion sortCriterion = - new SortCriterion().setField(CREATED_AT_FIELD).setOrder(SortOrder.DESCENDING); + final List sortCriteria = + Collections.singletonList( + new SortCriterion().setField(CREATED_AT_FIELD).setOrder(SortOrder.DESCENDING)); // First, get all Query Urns. final SearchResult gmsResult = @@ -74,7 +75,7 @@ public CompletableFuture get(final DataFetchingEnvironment en QUERY_ENTITY_NAME, query, buildFilters(input, context.getOperationContext().getAspectRetriever()), - sortCriterion, + sortCriteria, start, count); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AggregateAcrossEntitiesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AggregateAcrossEntitiesResolver.java index 04a72b14eeb021..19bccaf2650866 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AggregateAcrossEntitiesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AggregateAcrossEntitiesResolver.java @@ -22,6 +22,7 @@ import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.concurrent.CompletableFuture; import java.util.stream.Collectors; @@ -94,7 +95,7 @@ public CompletableFuture get(DataFetchingEnvironment environme : inputFilter, 0, 0, // 0 entity count because we don't want resolved entities - null, + Collections.emptyList(), facets)); } catch (Exception e) { log.error( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/GetQuickFiltersResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/GetQuickFiltersResolver.java index a61d9111321ca7..b07e3fa9126412 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/GetQuickFiltersResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/GetQuickFiltersResolver.java @@ -25,6 +25,7 @@ import graphql.schema.DataFetchingEnvironment; import io.datahubproject.metadata.context.OperationContext; import java.util.ArrayList; +import java.util.Collections; import java.util.Comparator; import java.util.List; import java.util.Optional; @@ -107,7 +108,7 @@ private SearchResult getSearchResults( : null, 0, 0, - null, + Collections.emptyList(), null); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolver.java index 287e339ddee50c..0dbed92b7d58e7 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolver.java @@ -18,8 +18,10 @@ import com.linkedin.view.DataHubViewInfo; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; +import java.util.Collections; import java.util.List; import java.util.concurrent.CompletableFuture; +import java.util.stream.Collectors; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; @@ -65,10 +67,24 @@ public CompletableFuture get(DataFetchingEnvironment environment) context.getOperationContext().getAspectRetriever()); SearchFlags searchFlags = mapInputFlags(context, input.getSearchFlags()); - SortCriterion sortCriterion = - input.getSortInput() != null - ? mapSortCriterion(input.getSortInput().getSortCriterion()) - : null; + List sortCriteria; + if (input.getSortInput() != null) { + if (input.getSortInput().getSortCriteria() != null) { + sortCriteria = + input.getSortInput().getSortCriteria().stream() + .map(SearchUtils::mapSortCriterion) + .collect(Collectors.toList()); + } else { + sortCriteria = + input.getSortInput().getSortCriterion() != null + ? Collections.singletonList( + mapSortCriterion(input.getSortInput().getSortCriterion())) + : Collections.emptyList(); + } + + } else { + sortCriteria = Collections.emptyList(); + } try { log.debug( @@ -100,7 +116,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) : baseFilter, start, count, - sortCriterion)); + sortCriteria)); } catch (Exception e) { log.error( "Failed to execute search for multiple entities: entity types {}, query {}, filters: {}, start: {}, count: {}", diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchResolver.java index 5fb2f8f14b293c..7a48e305dbfe49 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchResolver.java @@ -20,6 +20,7 @@ import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; import io.opentelemetry.extension.annotations.WithSpan; +import java.util.Collections; import java.util.concurrent.CompletableFuture; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; @@ -89,7 +90,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) input.getFilters(), input.getOrFilters(), context.getOperationContext().getAspectRetriever()), - null, + Collections.emptyList(), start, count)); } catch (Exception e) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/settings/docPropagation/DocPropagationSettingsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/settings/docPropagation/DocPropagationSettingsResolver.java index 84d3bcd7b376c0..0641d6aca63704 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/settings/docPropagation/DocPropagationSettingsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/settings/docPropagation/DocPropagationSettingsResolver.java @@ -33,7 +33,9 @@ public CompletableFuture get(final DataFetchingEnvironme final GlobalSettingsInfo globalSettings = _settingsService.getGlobalSettings(context.getOperationContext()); final DocPropagationSettings defaultSettings = new DocPropagationSettings(); - defaultSettings.setDocColumnPropagation(true); + // TODO: Enable by default. Currently the automation trusts the settings aspect, which + // does not have this. + defaultSettings.setDocColumnPropagation(false); return globalSettings != null && globalSettings.hasDocPropagation() ? mapDocPropagationSettings(globalSettings.getDocPropagation()) : defaultSettings; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ListGlobalViewsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ListGlobalViewsResolver.java index 952e55ca117f2d..265f4d5f5d56e2 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ListGlobalViewsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ListGlobalViewsResolver.java @@ -74,7 +74,7 @@ public CompletableFuture get(final DataFetchingEnvironment envi Constants.DATAHUB_VIEW_ENTITY_NAME, query, buildFilters(context.getOperationContext().getAspectRetriever()), - DEFAULT_SORT_CRITERION, + Collections.singletonList(DEFAULT_SORT_CRITERION), start, count); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ListMyViewsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ListMyViewsResolver.java index 32eb0e46bb6160..abfdeb2d608693 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ListMyViewsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ListMyViewsResolver.java @@ -79,7 +79,7 @@ public CompletableFuture get(final DataFetchingEnvironment envi viewType, context.getActorUrn(), context.getOperationContext().getAspectRetriever()), - DEFAULT_SORT_CRITERION, + Collections.singletonList(DEFAULT_SORT_CRITERION), start, count); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetMapper.java index 89d5aa8621bf08..a7b5f6de0c183d 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetMapper.java @@ -222,6 +222,7 @@ private void mapDatasetProperties( properties.setQualifiedName(gmsProperties.getQualifiedName()); dataset.setProperties(properties); dataset.setDescription(properties.getDescription()); + dataset.setName(properties.getName()); if (gmsProperties.getUri() != null) { dataset.setUri(gmsProperties.getUri().toString()); } @@ -248,6 +249,9 @@ private void mapEditableDatasetProperties(@Nonnull Dataset dataset, @Nonnull Dat new EditableDatasetProperties(dataMap); final DatasetEditableProperties editableProperties = new DatasetEditableProperties(); editableProperties.setDescription(editableDatasetProperties.getDescription()); + if (editableDatasetProperties.getName() != null) { + editableProperties.setName(editableDatasetProperties.getName()); + } dataset.setEditableProperties(editableProperties); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetUpdateInputMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetUpdateInputMapper.java index 122298bcab6547..104dc0e1043413 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetUpdateInputMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetUpdateInputMapper.java @@ -111,8 +111,13 @@ public Collection apply( if (datasetUpdateInput.getEditableProperties() != null) { final EditableDatasetProperties editableDatasetProperties = new EditableDatasetProperties(); - editableDatasetProperties.setDescription( - datasetUpdateInput.getEditableProperties().getDescription()); + if (datasetUpdateInput.getEditableProperties().getDescription() != null) { + editableDatasetProperties.setDescription( + datasetUpdateInput.getEditableProperties().getDescription()); + } + if (datasetUpdateInput.getEditableProperties().getName() != null) { + editableDatasetProperties.setName(datasetUpdateInput.getEditableProperties().getName()); + } editableDatasetProperties.setLastModified(auditStamp); editableDatasetProperties.setCreated(auditStamp); proposals.add( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeMapper.java index 26835f9e57dcd8..77457a814bd677 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeMapper.java @@ -15,40 +15,49 @@ public class EntityTypeMapper { static final Map ENTITY_TYPE_TO_NAME = ImmutableMap.builder() + .put(EntityType.DOMAIN, Constants.DOMAIN_ENTITY_NAME) .put(EntityType.DATASET, Constants.DATASET_ENTITY_NAME) - .put(EntityType.ROLE, Constants.ROLE_ENTITY_NAME) .put(EntityType.CORP_USER, Constants.CORP_USER_ENTITY_NAME) .put(EntityType.CORP_GROUP, Constants.CORP_GROUP_ENTITY_NAME) .put(EntityType.DATA_PLATFORM, Constants.DATA_PLATFORM_ENTITY_NAME) + .put(EntityType.ER_MODEL_RELATIONSHIP, Constants.ER_MODEL_RELATIONSHIP_ENTITY_NAME) .put(EntityType.DASHBOARD, Constants.DASHBOARD_ENTITY_NAME) + .put(EntityType.NOTEBOOK, Constants.NOTEBOOK_ENTITY_NAME) .put(EntityType.CHART, Constants.CHART_ENTITY_NAME) - .put(EntityType.TAG, Constants.TAG_ENTITY_NAME) .put(EntityType.DATA_FLOW, Constants.DATA_FLOW_ENTITY_NAME) .put(EntityType.DATA_JOB, Constants.DATA_JOB_ENTITY_NAME) - .put(EntityType.DATA_PROCESS_INSTANCE, Constants.DATA_PROCESS_INSTANCE_ENTITY_NAME) + .put(EntityType.TAG, Constants.TAG_ENTITY_NAME) .put(EntityType.GLOSSARY_TERM, Constants.GLOSSARY_TERM_ENTITY_NAME) .put(EntityType.GLOSSARY_NODE, Constants.GLOSSARY_NODE_ENTITY_NAME) + .put(EntityType.CONTAINER, Constants.CONTAINER_ENTITY_NAME) .put(EntityType.MLMODEL, Constants.ML_MODEL_ENTITY_NAME) .put(EntityType.MLMODEL_GROUP, Constants.ML_MODEL_GROUP_ENTITY_NAME) .put(EntityType.MLFEATURE_TABLE, Constants.ML_FEATURE_TABLE_ENTITY_NAME) .put(EntityType.MLFEATURE, Constants.ML_FEATURE_ENTITY_NAME) .put(EntityType.MLPRIMARY_KEY, Constants.ML_PRIMARY_KEY_ENTITY_NAME) - .put(EntityType.CONTAINER, Constants.CONTAINER_ENTITY_NAME) - .put(EntityType.DOMAIN, Constants.DOMAIN_ENTITY_NAME) - .put(EntityType.NOTEBOOK, Constants.NOTEBOOK_ENTITY_NAME) + .put(EntityType.INGESTION_SOURCE, Constants.INGESTION_SOURCE_ENTITY_NAME) + .put(EntityType.EXECUTION_REQUEST, Constants.EXECUTION_REQUEST_ENTITY_NAME) + .put(EntityType.ASSERTION, Constants.ASSERTION_ENTITY_NAME) + .put(EntityType.DATA_PROCESS_INSTANCE, Constants.DATA_PROCESS_INSTANCE_ENTITY_NAME) .put(EntityType.DATA_PLATFORM_INSTANCE, Constants.DATA_PLATFORM_INSTANCE_ENTITY_NAME) + .put(EntityType.ACCESS_TOKEN, Constants.ACCESS_TOKEN_ENTITY_NAME) .put(EntityType.TEST, Constants.TEST_ENTITY_NAME) - .put(EntityType.ER_MODEL_RELATIONSHIP, Constants.ER_MODEL_RELATIONSHIP_ENTITY_NAME) + .put(EntityType.DATAHUB_POLICY, Constants.POLICY_ENTITY_NAME) + .put(EntityType.DATAHUB_ROLE, Constants.DATAHUB_ROLE_ENTITY_NAME) + .put(EntityType.POST, Constants.POST_ENTITY_NAME) + .put(EntityType.SCHEMA_FIELD, Constants.SCHEMA_FIELD_ENTITY_NAME) .put(EntityType.DATAHUB_VIEW, Constants.DATAHUB_VIEW_ENTITY_NAME) + .put(EntityType.QUERY, Constants.QUERY_ENTITY_NAME) .put(EntityType.DATA_PRODUCT, Constants.DATA_PRODUCT_ENTITY_NAME) - .put(EntityType.SCHEMA_FIELD, Constants.SCHEMA_FIELD_ENTITY_NAME) + .put(EntityType.CUSTOM_OWNERSHIP_TYPE, Constants.OWNERSHIP_TYPE_ENTITY_NAME) + .put(EntityType.INCIDENT, Constants.INCIDENT_ENTITY_NAME) + .put(EntityType.ROLE, Constants.ROLE_ENTITY_NAME) .put(EntityType.STRUCTURED_PROPERTY, Constants.STRUCTURED_PROPERTY_ENTITY_NAME) - .put(EntityType.ASSERTION, Constants.ASSERTION_ENTITY_NAME) + .put(EntityType.FORM, Constants.FORM_ENTITY_NAME) + .put(EntityType.DATA_TYPE, Constants.DATA_TYPE_ENTITY_NAME) + .put(EntityType.ENTITY_TYPE, Constants.ENTITY_TYPE_ENTITY_NAME) .put(EntityType.RESTRICTED, Constants.RESTRICTED_ENTITY_NAME) .put(EntityType.BUSINESS_ATTRIBUTE, Constants.BUSINESS_ATTRIBUTE_ENTITY_NAME) - .put(EntityType.QUERY, Constants.QUERY_ENTITY_NAME) - .put(EntityType.POST, Constants.POST_ENTITY_NAME) - .put(EntityType.FORM, Constants.FORM_ENTITY_NAME) .build(); private static final Map ENTITY_NAME_TO_TYPE = diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeUrnMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeUrnMapper.java index 9e9bf86e5fe7fe..334faf753cb8b5 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeUrnMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeUrnMapper.java @@ -20,34 +20,63 @@ public class EntityTypeUrnMapper { static final Map ENTITY_NAME_TO_ENTITY_TYPE_URN = ImmutableMap.builder() + .put(Constants.DOMAIN_ENTITY_NAME, "urn:li:entityType:datahub.domain") .put(Constants.DATASET_ENTITY_NAME, "urn:li:entityType:datahub.dataset") - .put(Constants.ROLE_ENTITY_NAME, "urn:li:entityType:datahub.role") .put(Constants.CORP_USER_ENTITY_NAME, "urn:li:entityType:datahub.corpuser") .put(Constants.CORP_GROUP_ENTITY_NAME, "urn:li:entityType:datahub.corpGroup") .put(Constants.DATA_PLATFORM_ENTITY_NAME, "urn:li:entityType:datahub.dataPlatform") + .put( + Constants.ER_MODEL_RELATIONSHIP_ENTITY_NAME, + "urn:li:entityType:datahub.erModelRelationship") .put(Constants.DASHBOARD_ENTITY_NAME, "urn:li:entityType:datahub.dashboard") + .put(Constants.NOTEBOOK_ENTITY_NAME, "urn:li:entityType:datahub.notebook") .put(Constants.CHART_ENTITY_NAME, "urn:li:entityType:datahub.chart") - .put(Constants.TAG_ENTITY_NAME, "urn:li:entityType:datahub.tag") .put(Constants.DATA_FLOW_ENTITY_NAME, "urn:li:entityType:datahub.dataFlow") .put(Constants.DATA_JOB_ENTITY_NAME, "urn:li:entityType:datahub.dataJob") + .put(Constants.TAG_ENTITY_NAME, "urn:li:entityType:datahub.tag") .put(Constants.GLOSSARY_TERM_ENTITY_NAME, "urn:li:entityType:datahub.glossaryTerm") .put(Constants.GLOSSARY_NODE_ENTITY_NAME, "urn:li:entityType:datahub.glossaryNode") + .put(Constants.CONTAINER_ENTITY_NAME, "urn:li:entityType:datahub.container") .put(Constants.ML_MODEL_ENTITY_NAME, "urn:li:entityType:datahub.mlModel") .put(Constants.ML_MODEL_GROUP_ENTITY_NAME, "urn:li:entityType:datahub.mlModelGroup") .put(Constants.ML_FEATURE_TABLE_ENTITY_NAME, "urn:li:entityType:datahub.mlFeatureTable") .put(Constants.ML_FEATURE_ENTITY_NAME, "urn:li:entityType:datahub.mlFeature") .put(Constants.ML_PRIMARY_KEY_ENTITY_NAME, "urn:li:entityType:datahub.mlPrimaryKey") - .put(Constants.CONTAINER_ENTITY_NAME, "urn:li:entityType:datahub.container") - .put(Constants.DOMAIN_ENTITY_NAME, "urn:li:entityType:datahub.domain") - .put(Constants.NOTEBOOK_ENTITY_NAME, "urn:li:entityType:datahub.notebook") + .put( + Constants.INGESTION_SOURCE_ENTITY_NAME, + "urn:li:entityType:datahub.dataHubIngestionSource") + .put( + Constants.EXECUTION_REQUEST_ENTITY_NAME, + "urn:li:entityType:datahub.dataHubExecutionRequest") + .put(Constants.ASSERTION_ENTITY_NAME, "urn:li:entityType:datahub.assertion") + .put( + Constants.DATA_PROCESS_INSTANCE_ENTITY_NAME, + "urn:li:entityType:datahub.dataProcessInstance") .put( Constants.DATA_PLATFORM_INSTANCE_ENTITY_NAME, "urn:li:entityType:datahub.dataPlatformInstance") + .put(Constants.ACCESS_TOKEN_ENTITY_NAME, "urn:li:entityType:datahub.dataHubAccessToken") .put(Constants.TEST_ENTITY_NAME, "urn:li:entityType:datahub.test") + .put(Constants.POLICY_ENTITY_NAME, "urn:li:entityType:datahub.dataHubPolicy") + .put(Constants.DATAHUB_ROLE_ENTITY_NAME, "urn:li:entityType:datahub.dataHubRole") + .put(Constants.POST_ENTITY_NAME, "urn:li:entityType:datahub.post") + .put(Constants.SCHEMA_FIELD_ENTITY_NAME, "urn:li:entityType:datahub.schemaField") .put(Constants.DATAHUB_VIEW_ENTITY_NAME, "urn:li:entityType:datahub.dataHubView") + .put(Constants.QUERY_ENTITY_NAME, "urn:li:entityType:datahub.query") .put(Constants.DATA_PRODUCT_ENTITY_NAME, "urn:li:entityType:datahub.dataProduct") - .put(Constants.ASSERTION_ENTITY_NAME, "urn:li:entityType:datahub.assertion") - .put(Constants.SCHEMA_FIELD_ENTITY_NAME, "urn:li:entityType:datahub.schemaField") + .put(Constants.OWNERSHIP_TYPE_ENTITY_NAME, "urn:li:entityType:datahub.ownershipType") + .put(Constants.INCIDENT_ENTITY_NAME, "urn:li:entityType:datahub.incident") + .put(Constants.ROLE_ENTITY_NAME, "urn:li:entityType:datahub.role") + .put( + Constants.STRUCTURED_PROPERTY_ENTITY_NAME, + "urn:li:entityType:datahub.structuredProperty") + .put(Constants.FORM_ENTITY_NAME, "urn:li:entityType:datahub.form") + .put(Constants.DATA_TYPE_ENTITY_NAME, "urn:li:entityType:datahub.dataType") + .put(Constants.ENTITY_TYPE_ENTITY_NAME, "urn:li:entityType:datahub.entityType") + .put(Constants.RESTRICTED_ENTITY_NAME, "urn:li:entityType:datahub.restricted") + .put( + Constants.BUSINESS_ATTRIBUTE_ENTITY_NAME, + "urn:li:entityType:datahub.businessAttribute") .build(); private static final Map ENTITY_TYPE_URN_TO_NAME = diff --git a/datahub-graphql-core/src/main/resources/app.graphql b/datahub-graphql-core/src/main/resources/app.graphql index 024a7a989f9db5..262d2384d84ada 100644 --- a/datahub-graphql-core/src/main/resources/app.graphql +++ b/datahub-graphql-core/src/main/resources/app.graphql @@ -508,6 +508,11 @@ type FeatureFlagsConfig { """ dataContractsEnabled: Boolean! + """ + Whether dataset names are editable + """ + editableDatasetNameEnabled: Boolean! + """ If turned on, all siblings will be separated with no way to get to a "combined" sibling view """ diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql index 941a6a28ceb2c7..609597beee51bd 100644 --- a/datahub-graphql-core/src/main/resources/entity.graphql +++ b/datahub-graphql-core/src/main/resources/entity.graphql @@ -3482,6 +3482,11 @@ type DatasetEditableProperties { Description of the Dataset """ description: String + + """ + Editable name of the Dataset + """ + name: String } """ @@ -4850,6 +4855,10 @@ input DatasetEditablePropertiesUpdate { Writable description aka documentation for a Dataset """ description: String! + """ + Editable name of the Dataset + """ + name: String } """ diff --git a/datahub-graphql-core/src/main/resources/search.graphql b/datahub-graphql-core/src/main/resources/search.graphql index c7b5e61e9831c8..09a7217073527b 100644 --- a/datahub-graphql-core/src/main/resources/search.graphql +++ b/datahub-graphql-core/src/main/resources/search.graphql @@ -1372,7 +1372,12 @@ input SearchSortInput { """ A criterion to sort search results on """ - sortCriterion: SortCriterion! + sortCriterion: SortCriterion @deprecated(reason: "Use sortCriteria instead") + + """ + A list of values to sort search results on + """ + sortCriteria: [SortCriterion!] } """ diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/TestUtils.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/TestUtils.java index 0df5d162bab442..522e4be0ec5ec2 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/TestUtils.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/TestUtils.java @@ -12,14 +12,19 @@ import com.datahub.plugins.auth.authorization.Authorizer; import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.UrnUtils; +import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; import com.linkedin.mxe.MetadataChangeProposal; +import com.linkedin.r2.RemoteInvocationException; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.test.metadata.context.TestOperationContexts; import java.util.List; +import java.util.stream.Collectors; +import org.mockito.ArgumentCaptor; import org.mockito.Mockito; +import org.testng.Assert; public class TestUtils { @@ -120,53 +125,89 @@ public static QueryContext getMockDenyContext(String actorUrn, AuthorizationRequ } public static void verifyIngestProposal( - EntityService mockService, - int numberOfInvocations, - MetadataChangeProposal proposal) { + EntityService mockService, int numberOfInvocations, MetadataChangeProposal proposal) { verifyIngestProposal(mockService, numberOfInvocations, List.of(proposal)); } public static void verifyIngestProposal( - EntityService mockService, + EntityService mockService, int numberOfInvocations, List proposals) { - AspectsBatchImpl batch = - AspectsBatchImpl.builder() - .mcps( - proposals, - mock(AuditStamp.class), - TestOperationContexts.emptyRetrieverContext(null)) - .build(); + ArgumentCaptor batchCaptor = ArgumentCaptor.forClass(AspectsBatchImpl.class); + Mockito.verify(mockService, Mockito.times(numberOfInvocations)) - .ingestProposal(any(), Mockito.eq(batch), Mockito.eq(false)); + .ingestProposal(any(), batchCaptor.capture(), Mockito.eq(false)); + + // check has time + Assert.assertTrue( + batchCaptor.getValue().getItems().stream() + .allMatch(prop -> prop.getSystemMetadata().getLastObserved() > 0L)); + + // check without time + Assert.assertEquals( + batchCaptor.getValue().getItems().stream() + .map(m -> m.getSystemMetadata().setLastObserved(0)) + .collect(Collectors.toList()), + proposals.stream() + .map(m -> m.getSystemMetadata().setLastObserved(0)) + .collect(Collectors.toList())); } public static void verifySingleIngestProposal( - EntityService mockService, + EntityService mockService, int numberOfInvocations, - MetadataChangeProposal proposal) { + MetadataChangeProposal expectedProposal) { + ArgumentCaptor proposalCaptor = + ArgumentCaptor.forClass(MetadataChangeProposal.class); + Mockito.verify(mockService, Mockito.times(numberOfInvocations)) - .ingestProposal(any(), Mockito.eq(proposal), any(AuditStamp.class), Mockito.eq(false)); + .ingestProposal(any(), proposalCaptor.capture(), any(AuditStamp.class), Mockito.eq(false)); + + // check has time + Assert.assertTrue(proposalCaptor.getValue().getSystemMetadata().getLastObserved() > 0L); + + // check without time + proposalCaptor.getValue().getSystemMetadata().setLastObserved(0L); + expectedProposal.getSystemMetadata().setLastObserved(0L); + Assert.assertEquals(proposalCaptor.getValue(), expectedProposal); } - public static void verifyIngestProposal( - EntityService mockService, int numberOfInvocations) { + public static void verifyIngestProposal(EntityService mockService, int numberOfInvocations) { Mockito.verify(mockService, Mockito.times(numberOfInvocations)) .ingestProposal(any(), any(AspectsBatchImpl.class), Mockito.eq(false)); } public static void verifySingleIngestProposal( - EntityService mockService, int numberOfInvocations) { + EntityService mockService, int numberOfInvocations) { Mockito.verify(mockService, Mockito.times(numberOfInvocations)) .ingestProposal( any(), any(MetadataChangeProposal.class), any(AuditStamp.class), Mockito.eq(false)); } - public static void verifyNoIngestProposal(EntityService mockService) { + public static void verifyNoIngestProposal(EntityService mockService) { Mockito.verify(mockService, Mockito.times(0)) .ingestProposal(any(), any(AspectsBatchImpl.class), Mockito.anyBoolean()); } + public static void verifyIngestProposal( + EntityClient mockClient, int numberOfInvocations, MetadataChangeProposal expectedProposal) + throws RemoteInvocationException { + + ArgumentCaptor proposalCaptor = + ArgumentCaptor.forClass(MetadataChangeProposal.class); + + Mockito.verify(mockClient, Mockito.times(numberOfInvocations)) + .ingestProposal(any(), proposalCaptor.capture(), Mockito.eq(false)); + + // check has time + Assert.assertTrue(proposalCaptor.getValue().getSystemMetadata().getLastObserved() > 0L); + + // check without time + proposalCaptor.getValue().getSystemMetadata().setLastObserved(0L); + expectedProposal.getSystemMetadata().setLastObserved(0L); + Assert.assertEquals(proposalCaptor.getValue(), expectedProposal); + } + private TestUtils() {} } diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/assertion/DeleteAssertionResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/assertion/DeleteAssertionResolverTest.java index 4dd09c1e5cfd5f..948088175e8e63 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/assertion/DeleteAssertionResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/assertion/DeleteAssertionResolverTest.java @@ -31,7 +31,7 @@ public class DeleteAssertionResolverTest { public void testGetSuccess() throws Exception { EntityClient mockClient = Mockito.mock(EntityClient.class); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_ASSERTION_URN)), eq(true))) .thenReturn(true); Mockito.when( @@ -77,7 +77,7 @@ public void testGetSuccess() throws Exception { public void testGetSuccessNoAssertionInfoFound() throws Exception { EntityClient mockClient = Mockito.mock(EntityClient.class); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_ASSERTION_URN)), eq(true))) .thenReturn(true); Mockito.when( @@ -117,7 +117,7 @@ public void testGetSuccessAssertionAlreadyRemoved() throws Exception { // Create resolver EntityClient mockClient = Mockito.mock(EntityClient.class); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_ASSERTION_URN)), eq(true))) .thenReturn(false); @@ -149,7 +149,7 @@ public void testGetSuccessAssertionAlreadyRemoved() throws Exception { public void testGetUnauthorized() throws Exception { // Create resolver EntityClient mockClient = Mockito.mock(EntityClient.class); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_ASSERTION_URN)), eq(true))) .thenReturn(true); Mockito.when( @@ -186,7 +186,7 @@ public void testGetEntityClientException() throws Exception { .when(mockClient) .deleteEntity(any(), Mockito.any()); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_ASSERTION_URN)), eq(true))) .thenReturn(true); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolverTest.java index 6c876226a45e60..020f74475ea607 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolverTest.java @@ -12,11 +12,11 @@ import com.linkedin.datahub.graphql.generated.ListAccessTokenResult; import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.Constants; -import com.linkedin.metadata.query.filter.SortCriterion; import com.linkedin.metadata.search.SearchEntityArray; import com.linkedin.metadata.search.SearchResult; import graphql.schema.DataFetchingEnvironment; import java.util.Collections; +import java.util.List; import org.mockito.Mockito; import org.testng.annotations.Test; @@ -47,7 +47,7 @@ public void testGetSuccess() throws Exception { Mockito.eq(Constants.ACCESS_TOKEN_ENTITY_NAME), Mockito.eq(""), Mockito.eq(buildFilter(filters, Collections.emptyList(), null)), - Mockito.any(SortCriterion.class), + Mockito.any(List.class), Mockito.eq(input.getStart()), Mockito.eq(input.getCount()))) .thenReturn( diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/businessattribute/AddBusinessAttributeResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/businessattribute/AddBusinessAttributeResolverTest.java index 1a0e558e309d7b..280adcf896d5e9 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/businessattribute/AddBusinessAttributeResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/businessattribute/AddBusinessAttributeResolverTest.java @@ -29,7 +29,7 @@ public class AddBusinessAttributeResolverTest { "urn:li:businessAttribute:7d0c4283-de02-4043-aaf2-698b04274658"; private static final String RESOURCE_URN = "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD),field_bar)"; - private EntityService mockService; + private EntityService mockService; private QueryContext mockContext; private DataFetchingEnvironment mockEnv; diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/businessattribute/CreateBusinessAttributeResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/businessattribute/CreateBusinessAttributeResolverTest.java index 574b81bb86630f..2623a6b25811ad 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/businessattribute/CreateBusinessAttributeResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/businessattribute/CreateBusinessAttributeResolverTest.java @@ -56,7 +56,7 @@ public class CreateBusinessAttributeResolverTest { TEST_BUSINESS_ATTRIBUTE_DESCRIPTION, SchemaFieldDataType.BOOLEAN); private EntityClient mockClient; - private EntityService mockService; + private EntityService mockService; private QueryContext mockContext; private DataFetchingEnvironment mockEnv; private BusinessAttributeService businessAttributeService; diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/businessattribute/RemoveBusinessAttributeResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/businessattribute/RemoveBusinessAttributeResolverTest.java index 32a12d3ee8607e..3e7df667160624 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/businessattribute/RemoveBusinessAttributeResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/businessattribute/RemoveBusinessAttributeResolverTest.java @@ -31,7 +31,7 @@ public class RemoveBusinessAttributeResolverTest { "urn:li:businessAttribute:7d0c4283-de02-4043-aaf2-698b04274658"; private static final String RESOURCE_URN = "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD),field_bar)"; - private EntityService mockService; + private EntityService mockService; private QueryContext mockContext; private DataFetchingEnvironment mockEnv; diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/container/ContainerEntitiesResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/container/ContainerEntitiesResolverTest.java index c63c9bccab68b5..48732727762eea 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/container/ContainerEntitiesResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/container/ContainerEntitiesResolverTest.java @@ -62,7 +62,7 @@ public void testGetSuccess() throws Exception { new CriterionArray(ImmutableList.of(filterCriterion)))))), Mockito.eq(0), Mockito.eq(20), - Mockito.eq(null), + Mockito.eq(Collections.emptyList()), Mockito.eq(null))) .thenReturn( new SearchResult() diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/datacontract/UpsertDataContractResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/datacontract/UpsertDataContractResolverTest.java index 601fc56b251495..bf01b54c7ed726 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/datacontract/UpsertDataContractResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/datacontract/UpsertDataContractResolverTest.java @@ -2,6 +2,7 @@ import static com.linkedin.datahub.graphql.TestUtils.*; import static com.linkedin.datahub.graphql.resolvers.datacontract.EntityDataContractResolver.*; +import static com.linkedin.metadata.utils.SystemMetadataUtils.createDefaultSystemMetadata; import static org.mockito.ArgumentMatchers.any; import static org.testng.Assert.*; @@ -43,14 +44,19 @@ import com.linkedin.metadata.utils.EntityKeyUtils; import com.linkedin.metadata.utils.GenericRecordUtils; import com.linkedin.mxe.MetadataChangeProposal; -import com.linkedin.mxe.SystemMetadata; import com.linkedin.r2.RemoteInvocationException; import graphql.schema.DataFetchingEnvironment; import io.datahubproject.metadata.context.OperationContext; import java.util.Collections; +import java.util.List; import java.util.concurrent.CompletionException; +import java.util.stream.Collectors; +import org.mockito.ArgumentCaptor; +import org.mockito.Captor; import org.mockito.Mockito; +import org.mockito.MockitoAnnotations; import org.testng.Assert; +import org.testng.annotations.BeforeTest; import org.testng.annotations.Test; public class UpsertDataContractResolverTest { @@ -83,9 +89,15 @@ public class UpsertDataContractResolverTest { private static final Urn TEST_ACTOR_URN = UrnUtils.getUrn("urn:li:corpuser:test"); + @Captor private ArgumentCaptor> proposalCaptor; + + @BeforeTest + public void init() { + MockitoAnnotations.openMocks(this); + } + @Test public void testGetSuccessCreate() throws Exception { - // Expected results final DataContractKey key = new DataContractKey(); key.setId("test-id"); @@ -127,7 +139,8 @@ public void testGetSuccessCreate() throws Exception { propertiesProposal.setEntityUrn(dataContractUrn); propertiesProposal.setEntityType(Constants.DATA_CONTRACT_ENTITY_NAME); propertiesProposal.setSystemMetadata( - new SystemMetadata().setProperties(new StringMap(ImmutableMap.of("appSource", "ui")))); + createDefaultSystemMetadata() + .setProperties(new StringMap(ImmutableMap.of("appSource", "ui")))); propertiesProposal.setAspectName(Constants.DATA_CONTRACT_PROPERTIES_ASPECT_NAME); propertiesProposal.setAspect(GenericRecordUtils.serializeAspect(props)); propertiesProposal.setChangeType(ChangeType.UPSERT); @@ -136,16 +149,29 @@ public void testGetSuccessCreate() throws Exception { statusProposal.setEntityUrn(dataContractUrn); statusProposal.setEntityType(Constants.DATA_CONTRACT_ENTITY_NAME); statusProposal.setSystemMetadata( - new SystemMetadata().setProperties(new StringMap(ImmutableMap.of("appSource", "ui")))); + createDefaultSystemMetadata() + .setProperties(new StringMap(ImmutableMap.of("appSource", "ui")))); statusProposal.setAspectName(Constants.DATA_CONTRACT_STATUS_ASPECT_NAME); statusProposal.setAspect(GenericRecordUtils.serializeAspect(status)); statusProposal.setChangeType(ChangeType.UPSERT); Mockito.verify(mockClient, Mockito.times(1)) .batchIngestProposals( - any(OperationContext.class), - Mockito.eq(ImmutableList.of(propertiesProposal, statusProposal)), - Mockito.eq(false)); + any(OperationContext.class), proposalCaptor.capture(), Mockito.eq(false)); + + // check has time + Assert.assertTrue( + proposalCaptor.getValue().stream() + .allMatch(prop -> prop.getSystemMetadata().getLastObserved() > 0L)); + + // check without time + Assert.assertEquals( + proposalCaptor.getValue().stream() + .map(m -> m.getSystemMetadata().setLastObserved(0)) + .collect(Collectors.toList()), + List.of(propertiesProposal, statusProposal).stream() + .map(m -> m.getSystemMetadata().setLastObserved(0)) + .collect(Collectors.toList())); Assert.assertEquals(result.getUrn(), TEST_CONTRACT_URN.toString()); } @@ -188,7 +214,8 @@ public void testGetSuccessUpdate() throws Exception { propertiesProposal.setEntityUrn(TEST_CONTRACT_URN); propertiesProposal.setEntityType(Constants.DATA_CONTRACT_ENTITY_NAME); propertiesProposal.setSystemMetadata( - new SystemMetadata().setProperties(new StringMap(ImmutableMap.of("appSource", "ui")))); + createDefaultSystemMetadata() + .setProperties(new StringMap(ImmutableMap.of("appSource", "ui")))); propertiesProposal.setAspectName(Constants.DATA_CONTRACT_PROPERTIES_ASPECT_NAME); propertiesProposal.setAspect(GenericRecordUtils.serializeAspect(props)); propertiesProposal.setChangeType(ChangeType.UPSERT); @@ -197,16 +224,29 @@ public void testGetSuccessUpdate() throws Exception { statusProposal.setEntityUrn(TEST_CONTRACT_URN); statusProposal.setEntityType(Constants.DATA_CONTRACT_ENTITY_NAME); statusProposal.setSystemMetadata( - new SystemMetadata().setProperties(new StringMap(ImmutableMap.of("appSource", "ui")))); + createDefaultSystemMetadata() + .setProperties(new StringMap(ImmutableMap.of("appSource", "ui")))); statusProposal.setAspectName(Constants.DATA_CONTRACT_STATUS_ASPECT_NAME); statusProposal.setAspect(GenericRecordUtils.serializeAspect(status)); statusProposal.setChangeType(ChangeType.UPSERT); Mockito.verify(mockClient, Mockito.times(1)) .batchIngestProposals( - any(OperationContext.class), - Mockito.eq(ImmutableList.of(propertiesProposal, statusProposal)), - Mockito.eq(false)); + any(OperationContext.class), proposalCaptor.capture(), Mockito.eq(false)); + + // check has time + Assert.assertTrue( + proposalCaptor.getValue().stream() + .allMatch(prop -> prop.getSystemMetadata().getLastObserved() > 0L)); + + // check without time + Assert.assertEquals( + proposalCaptor.getValue().stream() + .map(m -> m.getSystemMetadata().setLastObserved(0)) + .collect(Collectors.toList()), + List.of(propertiesProposal, statusProposal).stream() + .map(m -> m.getSystemMetadata().setLastObserved(0)) + .collect(Collectors.toList())); Assert.assertEquals(result.getUrn(), TEST_CONTRACT_URN.toString()); } diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/delete/BatchUpdateSoftDeletedResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/delete/BatchUpdateSoftDeletedResolverTest.java index 02c9212e7f563d..48b3dc5f6db943 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/delete/BatchUpdateSoftDeletedResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/delete/BatchUpdateSoftDeletedResolverTest.java @@ -33,7 +33,7 @@ public class BatchUpdateSoftDeletedResolverTest { @Test public void testGetSuccessNoExistingStatus() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( @@ -84,7 +84,7 @@ public void testGetSuccessNoExistingStatus() throws Exception { public void testGetSuccessExistingStatus() throws Exception { final Status originalStatus = new Status().setRemoved(true); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( @@ -133,7 +133,7 @@ public void testGetSuccessExistingStatus() throws Exception { @Test public void testGetFailureResourceDoesNotExist() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( @@ -173,7 +173,7 @@ public void testGetFailureResourceDoesNotExist() throws Exception { @Test public void testGetUnauthorized() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); BatchUpdateSoftDeletedResolver resolver = new BatchUpdateSoftDeletedResolver(mockService); @@ -193,7 +193,7 @@ public void testGetUnauthorized() throws Exception { @Test public void testGetEntityClientException() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.doThrow(RuntimeException.class) .when(mockService) diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/deprecation/BatchUpdateDeprecationResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/deprecation/BatchUpdateDeprecationResolverTest.java index b7324d210fc212..265a1a2e0af5ba 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/deprecation/BatchUpdateDeprecationResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/deprecation/BatchUpdateDeprecationResolverTest.java @@ -34,7 +34,7 @@ public class BatchUpdateDeprecationResolverTest { @Test public void testGetSuccessNoExistingDeprecation() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( @@ -99,7 +99,7 @@ public void testGetSuccessExistingDeprecation() throws Exception { .setNote("") .setActor(UrnUtils.getUrn("urn:li:corpuser:test")); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( @@ -158,7 +158,7 @@ public void testGetSuccessExistingDeprecation() throws Exception { @Test public void testGetFailureResourceDoesNotExist() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( @@ -202,7 +202,7 @@ public void testGetFailureResourceDoesNotExist() throws Exception { @Test public void testGetUnauthorized() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); BatchUpdateDeprecationResolver resolver = new BatchUpdateDeprecationResolver(mockService); @@ -226,7 +226,7 @@ public void testGetUnauthorized() throws Exception { @Test public void testGetEntityClientException() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.doThrow(RuntimeException.class) .when(mockService) diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/deprecation/UpdateDeprecationResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/deprecation/UpdateDeprecationResolverTest.java index 09894ccf49f227..ab180724da46df 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/deprecation/UpdateDeprecationResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/deprecation/UpdateDeprecationResolverTest.java @@ -59,7 +59,7 @@ public void testGetSuccessNoExistingDeprecation() throws Exception { .setUrn(Urn.createFromString(TEST_ENTITY_URN)) .setAspects(new EnvelopedAspectMap(Collections.emptyMap())))); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_ENTITY_URN)), eq(true))) .thenReturn(true); @@ -83,7 +83,7 @@ public void testGetSuccessNoExistingDeprecation() throws Exception { MutationUtils.buildMetadataChangeProposalWithUrn( UrnUtils.getUrn(TEST_ENTITY_URN), DEPRECATION_ASPECT_NAME, newDeprecation); - Mockito.verify(mockClient, Mockito.times(1)).ingestProposal(any(), eq(proposal), eq(false)); + verifyIngestProposal(mockClient, 1, proposal); Mockito.verify(mockService, Mockito.times(1)) .exists(any(), eq(Urn.createFromString(TEST_ENTITY_URN)), eq(true)); @@ -120,7 +120,7 @@ public void testGetSuccessExistingDeprecation() throws Exception { new EnvelopedAspect() .setValue(new Aspect(originalDeprecation.data()))))))); - EntityService mockService = Mockito.mock(EntityService.class); + EntityService mockService = Mockito.mock(EntityService.class); Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_ENTITY_URN)), eq(true))) .thenReturn(true); @@ -144,7 +144,7 @@ public void testGetSuccessExistingDeprecation() throws Exception { MutationUtils.buildMetadataChangeProposalWithUrn( UrnUtils.getUrn(TEST_ENTITY_URN), DEPRECATION_ASPECT_NAME, newDeprecation); - Mockito.verify(mockClient, Mockito.times(1)).ingestProposal(any(), eq(proposal), eq(false)); + verifyIngestProposal(mockClient, 1, proposal); Mockito.verify(mockService, Mockito.times(1)) .exists(any(), eq(Urn.createFromString(TEST_ENTITY_URN)), eq(true)); @@ -169,7 +169,7 @@ public void testGetFailureEntityDoesNotExist() throws Exception { .setUrn(Urn.createFromString(TEST_ENTITY_URN)) .setAspects(new EnvelopedAspectMap(Collections.emptyMap())))); - EntityService mockService = Mockito.mock(EntityService.class); + EntityService mockService = Mockito.mock(EntityService.class); Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_ENTITY_URN)), eq(true))) .thenReturn(false); @@ -190,7 +190,7 @@ public void testGetFailureEntityDoesNotExist() throws Exception { public void testGetUnauthorized() throws Exception { // Create resolver EntityClient mockClient = Mockito.mock(EntityClient.class); - EntityService mockService = Mockito.mock(EntityService.class); + EntityService mockService = Mockito.mock(EntityService.class); UpdateDeprecationResolver resolver = new UpdateDeprecationResolver(mockClient, mockService); // Execute resolver @@ -206,7 +206,7 @@ public void testGetUnauthorized() throws Exception { @Test public void testGetEntityClientException() throws Exception { EntityClient mockClient = Mockito.mock(EntityClient.class); - EntityService mockService = Mockito.mock(EntityService.class); + EntityService mockService = Mockito.mock(EntityService.class); Mockito.doThrow(RemoteInvocationException.class) .when(mockClient) .ingestProposal(any(), Mockito.any(), anyBoolean()); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/BatchSetDomainResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/BatchSetDomainResolverTest.java index 9fec8b2fd9572c..1a9272c1335cf9 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/BatchSetDomainResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/BatchSetDomainResolverTest.java @@ -39,7 +39,7 @@ public class BatchSetDomainResolverTest { @Test public void testGetSuccessNoExistingDomains() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( @@ -105,7 +105,7 @@ public void testGetSuccessExistingDomains() throws Exception { new Domains() .setDomains(new UrnArray(ImmutableList.of(Urn.createFromString(TEST_DOMAIN_1_URN)))); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( @@ -176,7 +176,7 @@ public void testGetSuccessUnsetDomains() throws Exception { new Domains() .setDomains(new UrnArray(ImmutableList.of(Urn.createFromString(TEST_DOMAIN_1_URN)))); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( @@ -233,7 +233,7 @@ public void testGetSuccessUnsetDomains() throws Exception { @Test public void testGetFailureDomainDoesNotExist() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( @@ -268,7 +268,7 @@ public void testGetFailureDomainDoesNotExist() throws Exception { @Test public void testGetFailureResourceDoesNotExist() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( @@ -312,7 +312,7 @@ public void testGetFailureResourceDoesNotExist() throws Exception { @Test public void testGetUnauthorized() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); BatchSetDomainResolver resolver = new BatchSetDomainResolver(mockService); @@ -334,7 +334,7 @@ public void testGetUnauthorized() throws Exception { @Test public void testGetEntityClientException() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.doThrow(RuntimeException.class) .when(mockService) diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/CreateDomainResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/CreateDomainResolverTest.java index 1d6c4519358b45..c0d74225a9cf1d 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/CreateDomainResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/CreateDomainResolverTest.java @@ -53,7 +53,7 @@ public class CreateDomainResolverTest { public void testGetSuccess() throws Exception { // Create resolver EntityClient mockClient = Mockito.mock(EntityClient.class); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); CreateDomainResolver resolver = new CreateDomainResolver(mockClient, mockService); Mockito.when(mockClient.exists(any(), Mockito.eq(TEST_DOMAIN_URN))).thenReturn(false); @@ -103,7 +103,7 @@ public void testGetSuccess() throws Exception { @Test public void testGetSuccessNoParentDomain() throws Exception { EntityClient mockClient = Mockito.mock(EntityClient.class); - EntityService mockService = Mockito.mock(EntityService.class); + EntityService mockService = Mockito.mock(EntityService.class); CreateDomainResolver resolver = new CreateDomainResolver(mockClient, mockService); Mockito.when(mockClient.exists(any(), Mockito.eq(TEST_DOMAIN_URN))).thenReturn(false); @@ -146,7 +146,7 @@ public void testGetSuccessNoParentDomain() throws Exception { @Test public void testGetInvalidParent() throws Exception { EntityClient mockClient = Mockito.mock(EntityClient.class); - EntityService mockService = Mockito.mock(EntityService.class); + EntityService mockService = Mockito.mock(EntityService.class); CreateDomainResolver resolver = new CreateDomainResolver(mockClient, mockService); Mockito.when(mockClient.exists(any(), Mockito.eq(TEST_DOMAIN_URN))).thenReturn(false); @@ -164,7 +164,7 @@ public void testGetInvalidParent() throws Exception { @Test public void testGetNameConflict() throws Exception { EntityClient mockClient = Mockito.mock(EntityClient.class); - EntityService mockService = Mockito.mock(EntityService.class); + EntityService mockService = Mockito.mock(EntityService.class); CreateDomainResolver resolver = new CreateDomainResolver(mockClient, mockService); Mockito.when(mockClient.exists(any(), Mockito.eq(TEST_DOMAIN_URN))).thenReturn(false); @@ -218,7 +218,7 @@ public void testGetNameConflict() throws Exception { public void testGetUnauthorized() throws Exception { // Create resolver EntityClient mockClient = Mockito.mock(EntityClient.class); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); CreateDomainResolver resolver = new CreateDomainResolver(mockClient, mockService); // Execute resolver @@ -235,7 +235,7 @@ public void testGetUnauthorized() throws Exception { public void testGetEntityClientException() throws Exception { // Create resolver EntityClient mockClient = Mockito.mock(EntityClient.class); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.doThrow(RemoteInvocationException.class) .when(mockClient) .ingestProposal(any(), Mockito.any(), Mockito.eq(false)); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolverTest.java index f970f9e2ea431d..ad5d7f1ef6b06f 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolverTest.java @@ -68,7 +68,7 @@ public void testGetSuccess() throws Exception { new CriterionArray(ImmutableList.of(filterCriterion)))))), Mockito.eq(0), Mockito.eq(20), - Mockito.eq(null), + Mockito.eq(Collections.emptyList()), Mockito.eq(null))) .thenReturn( new SearchResult() diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/ListDomainsResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/ListDomainsResolverTest.java index 53a16ed5f6cc8a..c3b1a8c564855a 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/ListDomainsResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/ListDomainsResolverTest.java @@ -20,6 +20,7 @@ import com.linkedin.metadata.search.SearchResult; import com.linkedin.r2.RemoteInvocationException; import graphql.schema.DataFetchingEnvironment; +import java.util.Collections; import java.util.concurrent.CompletionException; import org.mockito.Mockito; import org.testng.annotations.Test; @@ -47,9 +48,10 @@ public void testGetSuccess() throws Exception { Mockito.eq(""), Mockito.eq(DomainUtils.buildParentDomainFilter(TEST_PARENT_DOMAIN_URN)), Mockito.eq( - new SortCriterion() - .setField(DOMAIN_CREATED_TIME_INDEX_FIELD_NAME) - .setOrder(SortOrder.DESCENDING)), + Collections.singletonList( + new SortCriterion() + .setField(DOMAIN_CREATED_TIME_INDEX_FIELD_NAME) + .setOrder(SortOrder.DESCENDING))), Mockito.eq(0), Mockito.eq(20))) .thenReturn( @@ -90,9 +92,10 @@ public void testGetSuccessNoParentDomain() throws Exception { Mockito.eq(""), Mockito.eq(DomainUtils.buildParentDomainFilter(null)), Mockito.eq( - new SortCriterion() - .setField(DOMAIN_CREATED_TIME_INDEX_FIELD_NAME) - .setOrder(SortOrder.DESCENDING)), + Collections.singletonList( + new SortCriterion() + .setField(DOMAIN_CREATED_TIME_INDEX_FIELD_NAME) + .setOrder(SortOrder.DESCENDING))), Mockito.eq(0), Mockito.eq(20))) .thenReturn( diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/MoveDomainResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/MoveDomainResolverTest.java index ae1dffbd5d0db8..07fad314747db8 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/MoveDomainResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/MoveDomainResolverTest.java @@ -40,7 +40,7 @@ public class MoveDomainResolverTest { private static final CorpuserUrn TEST_ACTOR_URN = new CorpuserUrn("test"); private MetadataChangeProposal setupTests( - DataFetchingEnvironment mockEnv, EntityService mockService, EntityClient mockClient) + DataFetchingEnvironment mockEnv, EntityService mockService, EntityClient mockClient) throws Exception { QueryContext mockContext = getMockAllowContext(); Mockito.when(mockContext.getAuthentication()).thenReturn(Mockito.mock(Authentication.class)); @@ -77,7 +77,7 @@ private MetadataChangeProposal setupTests( @Test public void testGetSuccess() throws Exception { - EntityService mockService = Mockito.mock(EntityService.class); + EntityService mockService = Mockito.mock(EntityService.class); EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when( mockService.exists( @@ -100,7 +100,7 @@ public void testGetSuccess() throws Exception { @Test public void testGetFailureEntityDoesNotExist() throws Exception { - EntityService mockService = Mockito.mock(EntityService.class); + EntityService mockService = Mockito.mock(EntityService.class); EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when(mockService.exists(any(), eq(Urn.createFromString(PARENT_DOMAIN_URN)), eq(true))) .thenReturn(true); @@ -127,7 +127,7 @@ public void testGetFailureEntityDoesNotExist() throws Exception { @Test public void testGetFailureParentDoesNotExist() throws Exception { - EntityService mockService = Mockito.mock(EntityService.class); + EntityService mockService = Mockito.mock(EntityService.class); EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when(mockService.exists(any(), eq(Urn.createFromString(PARENT_DOMAIN_URN)), eq(true))) .thenReturn(false); @@ -143,7 +143,7 @@ public void testGetFailureParentDoesNotExist() throws Exception { @Test public void testGetFailureParentIsNotDomain() throws Exception { - EntityService mockService = Mockito.mock(EntityService.class); + EntityService mockService = Mockito.mock(EntityService.class); EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when(mockService.exists(any(), eq(Urn.createFromString(PARENT_DOMAIN_URN)), eq(true))) .thenReturn(true); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/SetDomainResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/SetDomainResolverTest.java index 031ac1da8480bf..5437f1c860fde6 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/SetDomainResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/SetDomainResolverTest.java @@ -59,7 +59,7 @@ public void testGetSuccessNoExistingDomains() throws Exception { .setUrn(Urn.createFromString(TEST_ENTITY_URN)) .setAspects(new EnvelopedAspectMap(Collections.emptyMap())))); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_ENTITY_URN)), eq(true))) .thenReturn(true); Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_NEW_DOMAIN_URN)), eq(true))) @@ -82,8 +82,7 @@ public void testGetSuccessNoExistingDomains() throws Exception { MutationUtils.buildMetadataChangeProposalWithUrn( UrnUtils.getUrn(TEST_ENTITY_URN), DOMAINS_ASPECT_NAME, newDomains); - Mockito.verify(mockClient, Mockito.times(1)) - .ingestProposal(any(), Mockito.eq(proposal), Mockito.eq(false)); + verifyIngestProposal(mockClient, 1, proposal); Mockito.verify(mockService, Mockito.times(1)) .exists(any(), Mockito.eq(Urn.createFromString(TEST_ENTITY_URN)), eq(true)); @@ -122,7 +121,7 @@ public void testGetSuccessExistingDomains() throws Exception { new EnvelopedAspect() .setValue(new Aspect(originalDomains.data()))))))); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_ENTITY_URN)), eq(true))) .thenReturn(true); Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_NEW_DOMAIN_URN)), eq(true))) @@ -145,8 +144,7 @@ public void testGetSuccessExistingDomains() throws Exception { MutationUtils.buildMetadataChangeProposalWithUrn( UrnUtils.getUrn(TEST_ENTITY_URN), DOMAINS_ASPECT_NAME, newDomains); - Mockito.verify(mockClient, Mockito.times(1)) - .ingestProposal(any(), Mockito.eq(proposal), Mockito.eq(false)); + verifyIngestProposal(mockClient, 1, proposal); Mockito.verify(mockService, Mockito.times(1)) .exists(any(), Mockito.eq(Urn.createFromString(TEST_ENTITY_URN)), eq(true)); @@ -175,7 +173,7 @@ public void testGetFailureDomainDoesNotExist() throws Exception { .setUrn(Urn.createFromString(TEST_ENTITY_URN)) .setAspects(new EnvelopedAspectMap(Collections.emptyMap())))); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_ENTITY_URN)), eq(true))) .thenReturn(true); Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_NEW_DOMAIN_URN)), eq(true))) @@ -214,7 +212,7 @@ public void testGetFailureEntityDoesNotExist() throws Exception { .setUrn(Urn.createFromString(TEST_ENTITY_URN)) .setAspects(new EnvelopedAspectMap(Collections.emptyMap())))); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_ENTITY_URN)), eq(true))) .thenReturn(false); Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_NEW_DOMAIN_URN)), eq(true))) @@ -237,7 +235,7 @@ public void testGetFailureEntityDoesNotExist() throws Exception { public void testGetUnauthorized() throws Exception { // Create resolver EntityClient mockClient = Mockito.mock(EntityClient.class); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); SetDomainResolver resolver = new SetDomainResolver(mockClient, mockService); // Execute resolver diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/UnsetDomainResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/UnsetDomainResolverTest.java index b9910e6ca3c85e..1c61963703a2ab 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/UnsetDomainResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/UnsetDomainResolverTest.java @@ -58,7 +58,7 @@ public void testGetSuccessNoExistingDomains() throws Exception { .setUrn(Urn.createFromString(TEST_ENTITY_URN)) .setAspects(new EnvelopedAspectMap(Collections.emptyMap())))); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_ENTITY_URN)), eq(true))) .thenReturn(true); @@ -76,8 +76,7 @@ public void testGetSuccessNoExistingDomains() throws Exception { MutationUtils.buildMetadataChangeProposalWithUrn( UrnUtils.getUrn(TEST_ENTITY_URN), DOMAINS_ASPECT_NAME, newDomains); - Mockito.verify(mockClient, Mockito.times(1)) - .ingestProposal(any(), Mockito.eq(proposal), Mockito.eq(false)); + verifyIngestProposal(mockClient, 1, proposal); Mockito.verify(mockService, Mockito.times(1)) .exists(any(), Mockito.eq(Urn.createFromString(TEST_ENTITY_URN)), eq(true)); @@ -113,7 +112,7 @@ public void testGetSuccessExistingDomains() throws Exception { new EnvelopedAspect() .setValue(new Aspect(originalDomains.data()))))))); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_ENTITY_URN)), eq(true))) .thenReturn(true); @@ -131,8 +130,7 @@ public void testGetSuccessExistingDomains() throws Exception { MutationUtils.buildMetadataChangeProposalWithUrn( UrnUtils.getUrn(TEST_ENTITY_URN), DOMAINS_ASPECT_NAME, newDomains); - Mockito.verify(mockClient, Mockito.times(1)) - .ingestProposal(any(), Mockito.eq(proposal), Mockito.eq(false)); + verifyIngestProposal(mockClient, 1, proposal); Mockito.verify(mockService, Mockito.times(1)) .exists(any(), Mockito.eq(Urn.createFromString(TEST_ENTITY_URN)), eq(true)); @@ -158,7 +156,7 @@ public void testGetFailureEntityDoesNotExist() throws Exception { .setUrn(Urn.createFromString(TEST_ENTITY_URN)) .setAspects(new EnvelopedAspectMap(Collections.emptyMap())))); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_ENTITY_URN)), eq(true))) .thenReturn(false); @@ -178,7 +176,7 @@ public void testGetFailureEntityDoesNotExist() throws Exception { public void testGetUnauthorized() throws Exception { // Create resolver EntityClient mockClient = Mockito.mock(EntityClient.class); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); UnsetDomainResolver resolver = new UnsetDomainResolver(mockClient, mockService); // Execute resolver diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/embed/UpdateEmbedResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/embed/UpdateEmbedResolverTest.java index 2bb9dff29a5643..c45e620a46a73d 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/embed/UpdateEmbedResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/embed/UpdateEmbedResolverTest.java @@ -39,7 +39,7 @@ public class UpdateEmbedResolverTest { @Test public void testGetSuccessNoExistingEmbed() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( @@ -68,7 +68,6 @@ public void testGetSuccessNoExistingEmbed() throws Exception { Urn.createFromString(TEST_ENTITY_URN), EMBED_ASPECT_NAME, newEmbed); verifySingleIngestProposal(mockService, 1, proposal); - ; Mockito.verify(mockService, Mockito.times(1)) .exists(any(), Mockito.eq(Urn.createFromString(TEST_ENTITY_URN)), eq(true)); @@ -79,7 +78,7 @@ public void testGetSuccessExistingEmbed() throws Exception { Embed originalEmbed = new Embed().setRenderUrl("https://otherurl.com"); // Create resolver - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( @@ -132,7 +131,7 @@ public void testGetFailureEntityDoesNotExist() throws Exception { .setUrn(Urn.createFromString(TEST_ENTITY_URN)) .setAspects(new EnvelopedAspectMap(Collections.emptyMap())))); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_ENTITY_URN)), eq(true))) .thenReturn(false); @@ -154,7 +153,7 @@ public void testGetFailureEntityDoesNotExist() throws Exception { @Test public void testGetUnauthorized() throws Exception { // Create resolver - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); UpdateEmbedResolver resolver = new UpdateEmbedResolver(mockService); // Execute resolver @@ -171,7 +170,7 @@ public void testGetUnauthorized() throws Exception { @Test public void testGetEntityClientException() throws Exception { EntityClient mockClient = Mockito.mock(EntityClient.class); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.doThrow(RemoteInvocationException.class) .when(mockClient) .ingestProposal(any(), Mockito.any()); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/AddRelatedTermsResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/AddRelatedTermsResolverTest.java index ffa9e4a728dda4..a8920fa9e5b3c2 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/AddRelatedTermsResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/AddRelatedTermsResolverTest.java @@ -28,7 +28,7 @@ public class AddRelatedTermsResolverTest { private static final String DATASET_URN = "urn:li:dataset:(test,test,test)"; private EntityService setUpService() { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( any(), @@ -41,7 +41,7 @@ private EntityService setUpService() { @Test public void testGetSuccessIsRelatedNonExistent() throws Exception { - EntityService mockService = setUpService(); + EntityService mockService = setUpService(); EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_ENTITY_URN)), eq(true))) @@ -75,7 +75,7 @@ public void testGetSuccessIsRelatedNonExistent() throws Exception { @Test public void testGetSuccessHasRelatedNonExistent() throws Exception { - EntityService mockService = setUpService(); + EntityService mockService = setUpService(); EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_ENTITY_URN)), eq(true))) @@ -109,7 +109,7 @@ public void testGetSuccessHasRelatedNonExistent() throws Exception { @Test public void testGetFailAddSelfAsRelatedTerm() throws Exception { - EntityService mockService = setUpService(); + EntityService mockService = setUpService(); EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_ENTITY_URN)), eq(true))) @@ -131,7 +131,7 @@ public void testGetFailAddSelfAsRelatedTerm() throws Exception { @Test public void testGetFailAddNonTermAsRelatedTerm() throws Exception { - EntityService mockService = setUpService(); + EntityService mockService = setUpService(); EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_ENTITY_URN)), eq(true))) @@ -153,7 +153,7 @@ public void testGetFailAddNonTermAsRelatedTerm() throws Exception { @Test public void testGetFailAddNonExistentTermAsRelatedTerm() throws Exception { - EntityService mockService = setUpService(); + EntityService mockService = setUpService(); EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_ENTITY_URN)), eq(true))) @@ -177,7 +177,7 @@ public void testGetFailAddNonExistentTermAsRelatedTerm() throws Exception { @Test public void testGetFailAddToNonExistentUrn() throws Exception { - EntityService mockService = setUpService(); + EntityService mockService = setUpService(); EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_ENTITY_URN)), eq(true))) @@ -201,7 +201,7 @@ public void testGetFailAddToNonExistentUrn() throws Exception { @Test public void testGetFailAddToNonTerm() throws Exception { - EntityService mockService = setUpService(); + EntityService mockService = setUpService(); EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when(mockService.exists(any(), eq(Urn.createFromString(DATASET_URN)), eq(true))) @@ -225,7 +225,7 @@ public void testGetFailAddToNonTerm() throws Exception { @Test public void testFailNoPermissions() throws Exception { - EntityService mockService = setUpService(); + EntityService mockService = setUpService(); EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_ENTITY_URN)), eq(true))) diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/CreateGlossaryNodeResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/CreateGlossaryNodeResolverTest.java index 1a7e74c36733ca..0f2fa7f88cd9b6 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/CreateGlossaryNodeResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/CreateGlossaryNodeResolverTest.java @@ -2,8 +2,8 @@ import static com.linkedin.datahub.graphql.TestUtils.getMockAllowContext; import static com.linkedin.datahub.graphql.TestUtils.getMockEntityService; +import static com.linkedin.datahub.graphql.TestUtils.verifyIngestProposal; import static com.linkedin.metadata.Constants.*; -import static org.mockito.ArgumentMatchers.any; import com.datahub.authentication.Authentication; import com.linkedin.common.urn.GlossaryNodeUrn; @@ -63,7 +63,7 @@ private MetadataChangeProposal setupTest( @Test public void testGetSuccess() throws Exception { EntityClient mockClient = Mockito.mock(EntityClient.class); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); final MetadataChangeProposal proposal = setupTest(mockEnv, TEST_INPUT, "test-description", parentNodeUrn); @@ -71,14 +71,13 @@ public void testGetSuccess() throws Exception { CreateGlossaryNodeResolver resolver = new CreateGlossaryNodeResolver(mockClient, mockService); resolver.get(mockEnv).get(); - Mockito.verify(mockClient, Mockito.times(1)) - .ingestProposal(any(), Mockito.eq(proposal), Mockito.eq(false)); + verifyIngestProposal(mockClient, 1, proposal); } @Test public void testGetSuccessNoDescription() throws Exception { EntityClient mockClient = Mockito.mock(EntityClient.class); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); final MetadataChangeProposal proposal = setupTest(mockEnv, TEST_INPUT_NO_DESCRIPTION, "", parentNodeUrn); @@ -86,14 +85,13 @@ public void testGetSuccessNoDescription() throws Exception { CreateGlossaryNodeResolver resolver = new CreateGlossaryNodeResolver(mockClient, mockService); resolver.get(mockEnv).get(); - Mockito.verify(mockClient, Mockito.times(1)) - .ingestProposal(any(), Mockito.eq(proposal), Mockito.eq(false)); + verifyIngestProposal(mockClient, 1, proposal); } @Test public void testGetSuccessNoParentNode() throws Exception { EntityClient mockClient = Mockito.mock(EntityClient.class); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); final MetadataChangeProposal proposal = setupTest(mockEnv, TEST_INPUT_NO_PARENT_NODE, "test-description", null); @@ -101,7 +99,6 @@ public void testGetSuccessNoParentNode() throws Exception { CreateGlossaryNodeResolver resolver = new CreateGlossaryNodeResolver(mockClient, mockService); resolver.get(mockEnv).get(); - Mockito.verify(mockClient, Mockito.times(1)) - .ingestProposal(any(), Mockito.eq(proposal), Mockito.eq(false)); + verifyIngestProposal(mockClient, 1, proposal); } } diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/CreateGlossaryTermResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/CreateGlossaryTermResolverTest.java index 728a840d97e94e..8a51d8ea100092 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/CreateGlossaryTermResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/CreateGlossaryTermResolverTest.java @@ -2,6 +2,7 @@ import static com.linkedin.datahub.graphql.TestUtils.getMockAllowContext; import static com.linkedin.datahub.graphql.TestUtils.getMockEntityService; +import static com.linkedin.datahub.graphql.TestUtils.verifyIngestProposal; import static com.linkedin.metadata.Constants.*; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyBoolean; @@ -81,7 +82,7 @@ private MetadataChangeProposal setupTest( @Test public void testGetSuccess() throws Exception { EntityClient mockClient = initMockClient(); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); final MetadataChangeProposal proposal = setupTest(mockEnv, TEST_INPUT, "test-description", parentNodeUrn); @@ -89,14 +90,13 @@ public void testGetSuccess() throws Exception { CreateGlossaryTermResolver resolver = new CreateGlossaryTermResolver(mockClient, mockService); resolver.get(mockEnv).get(); - Mockito.verify(mockClient, Mockito.times(1)) - .ingestProposal(any(), Mockito.eq(proposal), Mockito.eq(false)); + verifyIngestProposal(mockClient, 1, proposal); } @Test public void testGetSuccessNoDescription() throws Exception { EntityClient mockClient = initMockClient(); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); final MetadataChangeProposal proposal = setupTest(mockEnv, TEST_INPUT_NO_DESCRIPTION, "", parentNodeUrn); @@ -104,14 +104,13 @@ public void testGetSuccessNoDescription() throws Exception { CreateGlossaryTermResolver resolver = new CreateGlossaryTermResolver(mockClient, mockService); resolver.get(mockEnv).get(); - Mockito.verify(mockClient, Mockito.times(1)) - .ingestProposal(any(), Mockito.eq(proposal), Mockito.eq(false)); + verifyIngestProposal(mockClient, 1, proposal); } @Test public void testGetSuccessNoParentNode() throws Exception { EntityClient mockClient = initMockClient(); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); final MetadataChangeProposal proposal = setupTest(mockEnv, TEST_INPUT_NO_PARENT_NODE, "test-description", null); @@ -119,8 +118,7 @@ public void testGetSuccessNoParentNode() throws Exception { CreateGlossaryTermResolver resolver = new CreateGlossaryTermResolver(mockClient, mockService); resolver.get(mockEnv).get(); - Mockito.verify(mockClient, Mockito.times(1)) - .ingestProposal(any(), Mockito.eq(proposal), Mockito.eq(false)); + verifyIngestProposal(mockClient, 1, proposal); } @Test @@ -157,7 +155,7 @@ public void testGetFailureExistingTermSameName() throws Exception { Mockito.eq(Collections.singleton(GLOSSARY_TERM_INFO_ASPECT_NAME)))) .thenReturn(result); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); CreateGlossaryEntityInput input = diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/DeleteGlossaryEntityResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/DeleteGlossaryEntityResolverTest.java index 18c4b07ffeff04..9adc5d5e516e52 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/DeleteGlossaryEntityResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/DeleteGlossaryEntityResolverTest.java @@ -25,7 +25,7 @@ public class DeleteGlossaryEntityResolverTest { @Test public void testGetSuccess() throws Exception { EntityClient mockClient = Mockito.mock(EntityClient.class); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_TERM_URN)), eq(true))) .thenReturn(true); @@ -50,7 +50,7 @@ public void testGetEntityClientException() throws Exception { .when(mockClient) .deleteEntity(any(), Mockito.any()); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_TERM_URN)), eq(true))) .thenReturn(true); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/RemoveRelatedTermsResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/RemoveRelatedTermsResolverTest.java index 5de7966dc15e99..85019a475865e1 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/RemoveRelatedTermsResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/RemoveRelatedTermsResolverTest.java @@ -36,7 +36,7 @@ public void testGetSuccessIsA() throws Exception { GlossaryTermUrn term2Urn = GlossaryTermUrn.createFromString(TEST_TERM_2_URN); final GlossaryRelatedTerms relatedTerms = new GlossaryRelatedTerms(); relatedTerms.setIsRelatedTerms(new GlossaryTermUrnArray(Arrays.asList(term1Urn, term2Urn))); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( any(), @@ -71,7 +71,7 @@ public void testGetSuccessHasA() throws Exception { GlossaryTermUrn term2Urn = GlossaryTermUrn.createFromString(TEST_TERM_2_URN); final GlossaryRelatedTerms relatedTerms = new GlossaryRelatedTerms(); relatedTerms.setHasRelatedTerms(new GlossaryTermUrnArray(Arrays.asList(term1Urn, term2Urn))); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( any(), @@ -102,7 +102,7 @@ public void testGetSuccessHasA() throws Exception { @Test public void testFailAspectDoesNotExist() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( any(), @@ -135,7 +135,7 @@ public void testFailNoPermissions() throws Exception { GlossaryTermUrn term2Urn = GlossaryTermUrn.createFromString(TEST_TERM_2_URN); final GlossaryRelatedTerms relatedTerms = new GlossaryRelatedTerms(); relatedTerms.setIsRelatedTerms(new GlossaryTermUrnArray(Arrays.asList(term1Urn, term2Urn))); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( any(), diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/UpdateNameResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/UpdateNameResolverTest.java index e6f4aae56127ce..b4a2655755a028 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/UpdateNameResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/UpdateNameResolverTest.java @@ -42,7 +42,7 @@ public class UpdateNameResolverTest { private static final CorpuserUrn TEST_ACTOR_URN = new CorpuserUrn("test"); private MetadataChangeProposal setupTests( - DataFetchingEnvironment mockEnv, EntityService mockService) throws Exception { + DataFetchingEnvironment mockEnv, EntityService mockService) throws Exception { QueryContext mockContext = getMockAllowContext(); Mockito.when(mockContext.getAuthentication()).thenReturn(Mockito.mock(Authentication.class)); Mockito.when(mockContext.getActorUrn()).thenReturn(TEST_ACTOR_URN.toString()); @@ -65,7 +65,7 @@ private MetadataChangeProposal setupTests( @Test public void testGetSuccess() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when( mockService.exists( @@ -83,7 +83,7 @@ public void testGetSuccess() throws Exception { @Test public void testGetSuccessForNode() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when(mockService.exists(any(), eq(Urn.createFromString(NODE_URN)), eq(true))) .thenReturn(true); @@ -117,7 +117,7 @@ public void testGetSuccessForNode() throws Exception { @Test public void testGetSuccessForDomain() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when(mockService.exists(any(), eq(Urn.createFromString(DOMAIN_URN)), eq(true))) .thenReturn(true); @@ -163,7 +163,7 @@ public void testGetSuccessForDomain() throws Exception { @Test public void testGetFailureEntityDoesNotExist() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when( mockService.exists( diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/UpdateParentNodeResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/UpdateParentNodeResolverTest.java index 39f9066bcddaa0..25a900d4d90696 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/UpdateParentNodeResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/UpdateParentNodeResolverTest.java @@ -43,7 +43,7 @@ public class UpdateParentNodeResolverTest { private static final CorpuserUrn TEST_ACTOR_URN = new CorpuserUrn("test"); private MetadataChangeProposal setupTests( - DataFetchingEnvironment mockEnv, EntityService mockService) throws Exception { + DataFetchingEnvironment mockEnv, EntityService mockService) throws Exception { QueryContext mockContext = getMockAllowContext(); Mockito.when(mockContext.getAuthentication()).thenReturn(Mockito.mock(Authentication.class)); Mockito.when(mockContext.getActorUrn()).thenReturn(TEST_ACTOR_URN.toString()); @@ -67,7 +67,7 @@ private MetadataChangeProposal setupTests( @Test public void testGetSuccess() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TERM_URN)), eq(true))) .thenReturn(true); @@ -89,7 +89,7 @@ public void testGetSuccess() throws Exception { @Test public void testGetSuccessForNode() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when(mockService.exists(any(), eq(Urn.createFromString(NODE_URN)), eq(true))) .thenReturn(true); @@ -129,7 +129,7 @@ public void testGetSuccessForNode() throws Exception { @Test public void testGetFailureEntityDoesNotExist() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TERM_URN)), eq(true))) .thenReturn(false); @@ -149,7 +149,7 @@ public void testGetFailureEntityDoesNotExist() throws Exception { @Test public void testGetFailureNodeDoesNotExist() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TERM_URN)), eq(true))) .thenReturn(true); @@ -169,7 +169,7 @@ public void testGetFailureNodeDoesNotExist() throws Exception { @Test public void testGetFailureParentIsNotNode() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TERM_URN)), eq(true))) .thenReturn(true); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/incident/EntityIncidentsResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/incident/EntityIncidentsResolverTest.java index 4be7eeba1d0180..4750143b8add8b 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/incident/EntityIncidentsResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/incident/EntityIncidentsResolverTest.java @@ -37,6 +37,7 @@ import com.linkedin.metadata.search.utils.QueryUtils; import graphql.schema.DataFetchingEnvironment; import io.datahubproject.metadata.context.OperationContext; +import java.util.Collections; import java.util.HashMap; import java.util.Map; import org.mockito.Mockito; @@ -92,7 +93,7 @@ public void testGetSuccess() throws Exception { Mockito.any(), Mockito.eq(Constants.INCIDENT_ENTITY_NAME), Mockito.eq(expectedFilter), - Mockito.eq(expectedSort), + Mockito.eq(Collections.singletonList(expectedSort)), Mockito.eq(0), Mockito.eq(10))) .thenReturn( diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/IngestionSourceExecutionRequestsResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/IngestionSourceExecutionRequestsResolverTest.java index c96dfe89adc5e1..fe4fe00454a261 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/IngestionSourceExecutionRequestsResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/IngestionSourceExecutionRequestsResolverTest.java @@ -22,7 +22,6 @@ import com.linkedin.execution.ExecutionRequestResult; import com.linkedin.metadata.Constants; import com.linkedin.metadata.query.filter.Filter; -import com.linkedin.metadata.query.filter.SortCriterion; import com.linkedin.metadata.search.SearchEntity; import com.linkedin.metadata.search.SearchEntityArray; import com.linkedin.metadata.search.SearchResult; @@ -30,6 +29,7 @@ import graphql.schema.DataFetchingEnvironment; import io.datahubproject.metadata.context.OperationContext; import java.util.HashSet; +import java.util.List; import org.mockito.Mockito; import org.testng.annotations.Test; @@ -46,7 +46,7 @@ public void testGetSuccess() throws Exception { any(), Mockito.eq(Constants.EXECUTION_REQUEST_ENTITY_NAME), Mockito.any(Filter.class), - Mockito.any(SortCriterion.class), + Mockito.any(List.class), Mockito.eq(0), Mockito.eq(10))) .thenReturn( diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/ListSecretsResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/ListSecretsResolverTest.java index 82b8d895384caa..96a12dc3be5a76 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/ListSecretsResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/ListSecretsResolverTest.java @@ -15,7 +15,6 @@ import com.linkedin.entity.EnvelopedAspectMap; import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.Constants; -import com.linkedin.metadata.query.filter.SortCriterion; import com.linkedin.metadata.search.SearchEntity; import com.linkedin.metadata.search.SearchEntityArray; import com.linkedin.metadata.search.SearchResult; @@ -24,6 +23,7 @@ import graphql.schema.DataFetchingEnvironment; import io.datahubproject.metadata.context.OperationContext; import java.util.HashSet; +import java.util.List; import org.mockito.Mockito; import org.testng.annotations.Test; @@ -44,7 +44,7 @@ public void testGetSuccess() throws Exception { Mockito.eq(Constants.SECRETS_ENTITY_NAME), Mockito.eq(""), Mockito.eq(null), - Mockito.any(SortCriterion.class), + Mockito.any(List.class), Mockito.eq(0), Mockito.eq(20))) .thenReturn( @@ -112,7 +112,7 @@ public void testGetUnauthorized() throws Exception { Mockito.any(), Mockito.eq(""), Mockito.eq(null), - Mockito.any(SortCriterion.class), + Mockito.any(List.class), Mockito.anyInt(), Mockito.anyInt()); } diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/source/UpsertIngestionSourceResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/source/UpsertIngestionSourceResolverTest.java index 57d96030a32aaa..955188a4e4fed1 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/source/UpsertIngestionSourceResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/source/UpsertIngestionSourceResolverTest.java @@ -1,5 +1,6 @@ package com.linkedin.datahub.graphql.resolvers.ingest.source; +import static com.linkedin.datahub.graphql.TestUtils.verifyIngestProposal; import static com.linkedin.datahub.graphql.resolvers.ingest.IngestTestUtils.*; import static com.linkedin.metadata.Constants.*; import static org.mockito.ArgumentMatchers.any; @@ -66,13 +67,11 @@ public void testGetSuccess() throws Exception { .setExecutorId(TEST_INPUT.getConfig().getExecutorId()) .setDebugMode(TEST_INPUT.getConfig().getDebugMode())); - Mockito.verify(mockClient, Mockito.times(1)) - .ingestProposal( - any(), - Mockito.eq( - MutationUtils.buildMetadataChangeProposalWithUrn( - TEST_INGESTION_SOURCE_URN, INGESTION_INFO_ASPECT_NAME, info)), - Mockito.eq(false)); + verifyIngestProposal( + mockClient, + 1, + MutationUtils.buildMetadataChangeProposalWithUrn( + TEST_INGESTION_SOURCE_URN, INGESTION_INFO_ASPECT_NAME, info)); } @Test diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/mutate/SiblingsUtilsTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/mutate/SiblingsUtilsTest.java index 5965c8b790a760..e76317391ac34b 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/mutate/SiblingsUtilsTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/mutate/SiblingsUtilsTest.java @@ -31,7 +31,7 @@ public class SiblingsUtilsTest { public void testGetSiblingUrns() { UrnArray siblingUrns = new UrnArray(UrnUtils.getUrn(TEST_DATASET_URN2), UrnUtils.getUrn(TEST_DATASET_URN3)); - EntityService mockService = mock(EntityService.class); + EntityService mockService = mock(EntityService.class); Mockito.when( mockService.getLatestAspect( any(), eq(UrnUtils.getUrn(TEST_DATASET_URN1)), eq(SIBLINGS_ASPECT_NAME))) @@ -45,7 +45,7 @@ public void testGetSiblingUrns() { @Test public void testGetSiblingUrnsWithoutSiblings() { - EntityService mockService = mock(EntityService.class); + EntityService mockService = mock(EntityService.class); Mockito.when( mockService.getLatestAspect( any(), eq(UrnUtils.getUrn(TEST_DATASET_URN1)), eq(SIBLINGS_ASPECT_NAME))) @@ -59,7 +59,7 @@ public void testGetSiblingUrnsWithoutSiblings() { @Test public void testGetSiblingUrnsWithSiblingsAspect() { - EntityService mockService = mock(EntityService.class); + EntityService mockService = mock(EntityService.class); Mockito.when( mockService.getLatestAspect( any(), eq(UrnUtils.getUrn(TEST_DATASET_URN1)), eq(SIBLINGS_ASPECT_NAME))) diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateUserSettingResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateUserSettingResolverTest.java index ce339fa2c75c7c..313c15c95c952f 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateUserSettingResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateUserSettingResolverTest.java @@ -23,7 +23,7 @@ public class UpdateUserSettingResolverTest { @Test public void testWriteCorpUserSettings() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_USER_URN)), eq(true))) .thenReturn(true); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/operation/ReportOperationResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/operation/ReportOperationResolverTest.java index ad5f39d4b3c6d2..c4778cbbd40535 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/operation/ReportOperationResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/operation/ReportOperationResolverTest.java @@ -57,8 +57,7 @@ public void testGetSuccess() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); resolver.get(mockEnv).get(); - Mockito.verify(mockClient, Mockito.times(1)) - .ingestProposal(any(), Mockito.eq(expectedProposal), Mockito.eq(false)); + verifyIngestProposal(mockClient, 1, expectedProposal); } @Test diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/AddOwnersResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/AddOwnersResolverTest.java index 40a2e45724381c..b239e0300ffcc5 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/AddOwnersResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/AddOwnersResolverTest.java @@ -38,7 +38,7 @@ public class AddOwnersResolverTest { @Test public void testGetSuccessNoExistingOwners() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( @@ -101,7 +101,7 @@ public void testGetSuccessNoExistingOwners() throws Exception { @Test public void testGetSuccessExistingOwnerNewType() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); com.linkedin.common.Ownership oldOwnership = new Ownership() @@ -167,7 +167,7 @@ public void testGetSuccessExistingOwnerNewType() throws Exception { @Test public void testGetSuccessDeprecatedTypeToOwnershipType() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); com.linkedin.common.Ownership oldOwnership = new Ownership() @@ -230,7 +230,7 @@ public void testGetSuccessDeprecatedTypeToOwnershipType() throws Exception { @Test public void testGetSuccessMultipleOwnerTypes() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); com.linkedin.common.Ownership oldOwnership = new Ownership() @@ -328,7 +328,7 @@ public void testGetSuccessMultipleOwnerTypes() throws Exception { @Test public void testGetFailureOwnerDoesNotExist() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( @@ -366,7 +366,7 @@ public void testGetFailureOwnerDoesNotExist() throws Exception { @Test public void testGetFailureResourceDoesNotExist() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( @@ -404,7 +404,7 @@ public void testGetFailureResourceDoesNotExist() throws Exception { @Test public void testGetUnauthorized() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); AddOwnersResolver resolver = new AddOwnersResolver(mockService); @@ -429,7 +429,7 @@ public void testGetUnauthorized() throws Exception { @Test public void testGetEntityClientException() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.doThrow(RuntimeException.class) .when(mockService) diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchAddOwnersResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchAddOwnersResolverTest.java index 2d7b67685cc697..8275f9f83ef83f 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchAddOwnersResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchAddOwnersResolverTest.java @@ -38,7 +38,7 @@ public class BatchAddOwnersResolverTest { @Test public void testGetSuccessNoExistingOwners() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( @@ -126,7 +126,7 @@ public void testGetSuccessExistingOwners() throws Exception { new Owner() .setOwner(Urn.createFromString(TEST_OWNER_URN_1)) .setType(OwnershipType.TECHNICAL_OWNER)))); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( @@ -217,7 +217,7 @@ public void testGetSuccessExistingOwners() throws Exception { @Test public void testGetFailureOwnerDoesNotExist() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( @@ -267,7 +267,7 @@ public void testGetFailureOwnerDoesNotExist() throws Exception { @Test public void testGetFailureResourceDoesNotExist() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( @@ -326,7 +326,7 @@ public void testGetFailureResourceDoesNotExist() throws Exception { @Test public void testGetUnauthorized() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); BatchAddOwnersResolver resolver = new BatchAddOwnersResolver(mockService); @@ -363,7 +363,7 @@ public void testGetUnauthorized() throws Exception { @Test public void testGetEntityClientException() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.doThrow(RuntimeException.class) .when(mockService) diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchRemoveOwnersResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchRemoveOwnersResolverTest.java index 8fcedfa605e1bc..9ea9ac693b98ed 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchRemoveOwnersResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchRemoveOwnersResolverTest.java @@ -35,7 +35,7 @@ public class BatchRemoveOwnersResolverTest { @Test public void testGetSuccessNoExistingOwners() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( @@ -83,7 +83,7 @@ public void testGetSuccessNoExistingOwners() throws Exception { @Test public void testGetSuccessExistingOwners() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); final Ownership oldOwners1 = new Ownership() @@ -150,7 +150,7 @@ public void testGetSuccessExistingOwners() throws Exception { @Test public void testGetFailureResourceDoesNotExist() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( @@ -195,7 +195,7 @@ public void testGetFailureResourceDoesNotExist() throws Exception { @Test public void testGetUnauthorized() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); BatchRemoveOwnersResolver resolver = new BatchRemoveOwnersResolver(mockService); @@ -218,7 +218,7 @@ public void testGetUnauthorized() throws Exception { @Test public void testGetEntityClientException() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.doThrow(RuntimeException.class) .when(mockService) diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolverTest.java index 70b427a1606f12..ee728b17e8c621 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolverTest.java @@ -69,9 +69,10 @@ public void testGetSuccess(final ListQueriesInput input) throws Exception { : input.getQuery()), Mockito.eq(buildFilter(input.getSource(), input.getDatasetUrn())), Mockito.eq( - new SortCriterion() - .setField(ListQueriesResolver.CREATED_AT_FIELD) - .setOrder(SortOrder.DESCENDING)), + Collections.singletonList( + new SortCriterion() + .setField(ListQueriesResolver.CREATED_AT_FIELD) + .setOrder(SortOrder.DESCENDING))), Mockito.eq(input.getStart()), Mockito.eq(input.getCount()))) .thenReturn( diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/AggregateAcrossEntitiesResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/AggregateAcrossEntitiesResolverTest.java index 40062ed08977ac..d32eb9fcf120ca 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/AggregateAcrossEntitiesResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/AggregateAcrossEntitiesResolverTest.java @@ -324,7 +324,7 @@ public static void testErrorFetchingResults() throws Exception { Mockito.any(), Mockito.anyInt(), Mockito.anyInt(), - Mockito.eq(null), + Mockito.eq(Collections.emptyList()), Mockito.eq(null))) .thenThrow(new RemoteInvocationException()); @@ -397,7 +397,7 @@ private static EntityClient initMockEntityClient( Mockito.eq(filter), Mockito.eq(start), Mockito.eq(limit), - Mockito.eq(null), + Mockito.eq(Collections.emptyList()), Mockito.eq(facets))) .thenReturn(result); return client; @@ -420,7 +420,7 @@ private static void verifyMockEntityClient( Mockito.eq(filter), Mockito.eq(start), Mockito.eq(limit), - Mockito.eq(null), + Mockito.eq(Collections.emptyList()), Mockito.eq(facets)); } diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/GetQuickFiltersResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/GetQuickFiltersResolverTest.java index 25e374c766deba..64042e82bbfe88 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/GetQuickFiltersResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/GetQuickFiltersResolverTest.java @@ -24,6 +24,7 @@ import com.linkedin.r2.RemoteInvocationException; import graphql.schema.DataFetchingEnvironment; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.concurrent.CompletionException; import java.util.stream.Collectors; @@ -114,7 +115,7 @@ public static void testGetQuickFiltersFailure() throws Exception { Mockito.any(), Mockito.anyInt(), Mockito.anyInt(), - Mockito.eq(null), + Mockito.eq(Collections.emptyList()), Mockito.eq(null))) .thenThrow(new RemoteInvocationException()); @@ -300,7 +301,7 @@ private static EntityClient initMockEntityClient( Mockito.eq(filter), Mockito.eq(start), Mockito.eq(limit), - Mockito.eq(null), + Mockito.eq(Collections.emptyList()), Mockito.eq(null))) .thenReturn(result); return client; diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolverTest.java index bcbfda6c71bba2..30d6f2dc6f2836 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolverTest.java @@ -437,8 +437,8 @@ public static void testApplyViewErrorFetchingView() throws Exception { Mockito.any(), Mockito.anyInt(), Mockito.anyInt(), - Mockito.eq(null), - Mockito.eq(null))) + Mockito.eq(Collections.emptyList()), + Mockito.eq(Collections.emptyList()))) .thenThrow(new RemoteInvocationException()); final SearchAcrossEntitiesResolver resolver = @@ -485,7 +485,7 @@ private static EntityClient initMockEntityClient( Mockito.eq(filter), Mockito.eq(start), Mockito.eq(limit), - Mockito.eq(null))) + Mockito.eq(Collections.emptyList()))) .thenReturn(result); return client; } @@ -506,7 +506,7 @@ private static void verifyMockEntityClient( Mockito.eq(filter), Mockito.eq(start), Mockito.eq(limit), - Mockito.eq(null)); + Mockito.eq(Collections.emptyList())); } private static void verifyMockViewService(ViewService mockService, Urn viewUrn) { diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/SearchResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/SearchResolverTest.java index a5310a052f613c..fbbf5cf314eda3 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/SearchResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/SearchResolverTest.java @@ -18,6 +18,8 @@ import com.linkedin.metadata.search.SearchResult; import com.linkedin.metadata.search.SearchResultMetadata; import graphql.schema.DataFetchingEnvironment; +import java.util.Collections; +import java.util.List; import org.mockito.Mockito; import org.testng.annotations.Test; @@ -56,7 +58,7 @@ public void testDefaultSearchFlags() throws Exception { Constants.DATASET_ENTITY_NAME, // Verify that merged entity types were used. "", null, - null, + Collections.emptyList(), 0, 10, setConvertSchemaFieldsToDatasets( @@ -97,7 +99,7 @@ public void testOverrideSearchFlags() throws Exception { Constants.DATASET_ENTITY_NAME, // Verify that merged entity types were used. "", null, - null, + Collections.emptyList(), 1, 11, setConvertSchemaFieldsToDatasets( @@ -129,7 +131,7 @@ public void testNonWildCardSearchFlags() throws Exception { Constants.DATASET_ENTITY_NAME, // Verify that merged entity types were used. "not a wildcard", null, // Verify that view filter was used. - null, + Collections.emptyList(), 0, 10, setConvertSchemaFieldsToDatasets( @@ -170,7 +172,7 @@ private void verifyMockSearchEntityClient( String entityName, String query, Filter filter, - SortCriterion sortCriterion, + List sortCriteria, int start, int limit, com.linkedin.metadata.query.SearchFlags searchFlags) @@ -181,7 +183,7 @@ private void verifyMockSearchEntityClient( Mockito.eq(entityName), Mockito.eq(query), Mockito.eq(filter), - Mockito.eq(sortCriterion), + Mockito.eq(sortCriteria), Mockito.eq(start), Mockito.eq(limit)); } diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/AddTagsResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/AddTagsResolverTest.java index 4384d9e2650e4f..ee1d59cdf87c75 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/AddTagsResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/AddTagsResolverTest.java @@ -35,7 +35,7 @@ public class AddTagsResolverTest { @Test public void testGetSuccessNoExistingTags() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( @@ -94,7 +94,7 @@ public void testGetSuccessExistingTags() throws Exception { ImmutableList.of( new TagAssociation().setTag(TagUrn.createFromString(TEST_TAG_1_URN))))); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( @@ -146,7 +146,7 @@ public void testGetSuccessExistingTags() throws Exception { @Test public void testGetFailureTagDoesNotExist() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( @@ -177,7 +177,7 @@ public void testGetFailureTagDoesNotExist() throws Exception { @Test public void testGetFailureResourceDoesNotExist() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( @@ -208,7 +208,7 @@ public void testGetFailureResourceDoesNotExist() throws Exception { @Test public void testGetUnauthorized() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); AddTagsResolver resolver = new AddTagsResolver(mockService); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/BatchAddTagsResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/BatchAddTagsResolverTest.java index 0d22cd6e5eb2b1..5f6db4cb1e5a56 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/BatchAddTagsResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/BatchAddTagsResolverTest.java @@ -39,7 +39,7 @@ public class BatchAddTagsResolverTest { @Test public void testGetSuccessNoExistingTags() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( @@ -115,7 +115,7 @@ public void testGetSuccessExistingTags() throws Exception { ImmutableList.of( new TagAssociation().setTag(TagUrn.createFromString(TEST_TAG_1_URN))))); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( @@ -184,7 +184,7 @@ public void testGetSuccessExistingTags() throws Exception { @Test public void testGetFailureTagDoesNotExist() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( @@ -218,7 +218,7 @@ public void testGetFailureTagDoesNotExist() throws Exception { @Test public void testGetFailureResourceDoesNotExist() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( @@ -263,7 +263,7 @@ public void testGetFailureResourceDoesNotExist() throws Exception { @Test public void testGetUnauthorized() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); BatchAddTagsResolver resolver = new BatchAddTagsResolver(mockService); @@ -286,7 +286,7 @@ public void testGetUnauthorized() throws Exception { @Test public void testGetEntityClientException() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.doThrow(RuntimeException.class) .when(mockService) diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/BatchRemoveTagsResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/BatchRemoveTagsResolverTest.java index e6c33ea7341788..9f34c0da82744a 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/BatchRemoveTagsResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/BatchRemoveTagsResolverTest.java @@ -42,7 +42,7 @@ public class BatchRemoveTagsResolverTest { @Test public void testGetSuccessNoExistingTags() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( @@ -104,7 +104,7 @@ public void testGetSuccessNoExistingTags() throws Exception { @Test public void testGetSuccessExistingTags() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); final GlobalTags oldTags1 = new GlobalTags() @@ -177,7 +177,7 @@ public void testGetSuccessExistingTags() throws Exception { @Test public void testGetFailureResourceDoesNotExist() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( @@ -222,7 +222,7 @@ public void testGetFailureResourceDoesNotExist() throws Exception { @Test public void testGetUnauthorized() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); BatchRemoveTagsResolver resolver = new BatchRemoveTagsResolver(mockService); @@ -245,7 +245,7 @@ public void testGetUnauthorized() throws Exception { @Test public void testGetEntityClientException() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.doThrow(RuntimeException.class) .when(mockService) diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/CreateTagResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/CreateTagResolverTest.java index cd9ac9f0b610a8..6c8984addb2651 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/CreateTagResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/CreateTagResolverTest.java @@ -27,7 +27,7 @@ public class CreateTagResolverTest { @Test public void testGetSuccess() throws Exception { // Create resolver - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when( mockClient.ingestProposal( @@ -53,14 +53,13 @@ public void testGetSuccess() throws Exception { key, TAG_ENTITY_NAME, TAG_PROPERTIES_ASPECT_NAME, props); // Not ideal to match against "any", but we don't know the auto-generated execution request id - Mockito.verify(mockClient, Mockito.times(1)) - .ingestProposal(any(), Mockito.eq(proposal), Mockito.eq(false)); + verifyIngestProposal(mockClient, 1, proposal); } @Test public void testGetUnauthorized() throws Exception { // Create resolver - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); EntityClient mockClient = Mockito.mock(EntityClient.class); CreateTagResolver resolver = new CreateTagResolver(mockClient, mockService); @@ -77,7 +76,7 @@ public void testGetUnauthorized() throws Exception { @Test public void testGetEntityClientException() throws Exception { // Create resolver - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.doThrow(RuntimeException.class) .when(mockClient) diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/SetTagColorResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/SetTagColorResolverTest.java index 624e300b19f461..92f7a2688b43eb 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/SetTagColorResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/SetTagColorResolverTest.java @@ -38,7 +38,7 @@ public class SetTagColorResolverTest { public void testGetSuccessExistingProperties() throws Exception { // Create resolver EntityClient mockClient = Mockito.mock(EntityClient.class); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); // Test setting the domain final TagProperties oldTagProperties = new TagProperties().setName("Test Tag"); @@ -69,8 +69,7 @@ public void testGetSuccessExistingProperties() throws Exception { MutationUtils.buildMetadataChangeProposalWithUrn( UrnUtils.getUrn(TEST_ENTITY_URN), TAG_PROPERTIES_ASPECT_NAME, newTagProperties); - Mockito.verify(mockClient, Mockito.times(1)) - .ingestProposal(any(), Mockito.eq(proposal), Mockito.eq(false)); + verifyIngestProposal(mockClient, 1, proposal); Mockito.verify(mockService, Mockito.times(1)) .exists(any(), Mockito.eq(Urn.createFromString(TEST_ENTITY_URN)), eq(true)); @@ -80,7 +79,7 @@ public void testGetSuccessExistingProperties() throws Exception { public void testGetFailureNoExistingProperties() throws Exception { // Create resolver EntityClient mockClient = Mockito.mock(EntityClient.class); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); // Test setting the domain Mockito.when( @@ -135,7 +134,7 @@ public void testGetFailureTagDoesNotExist() throws Exception { ImmutableMap.of( Constants.TAG_PROPERTIES_ASPECT_NAME, oldTagPropertiesAspect))))); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_ENTITY_URN)), eq(true))) .thenReturn(false); @@ -155,7 +154,7 @@ public void testGetFailureTagDoesNotExist() throws Exception { public void testGetUnauthorized() throws Exception { // Create resolver EntityClient mockClient = Mockito.mock(EntityClient.class); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); SetTagColorResolver resolver = new SetTagColorResolver(mockClient, mockService); // Execute resolver diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/term/AddTermsResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/term/AddTermsResolverTest.java index 9eeb525c3657e9..8f8a071ce89329 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/term/AddTermsResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/term/AddTermsResolverTest.java @@ -32,7 +32,7 @@ public class AddTermsResolverTest { @Test public void testGetSuccessNoExistingTerms() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( @@ -82,7 +82,7 @@ public void testGetSuccessExistingTerms() throws Exception { new GlossaryTermAssociation() .setUrn(GlossaryTermUrn.createFromString(TEST_TERM_1_URN))))); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( @@ -124,7 +124,7 @@ public void testGetSuccessExistingTerms() throws Exception { @Test public void testGetFailureTermDoesNotExist() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( @@ -156,7 +156,7 @@ public void testGetFailureTermDoesNotExist() throws Exception { @Test public void testGetFailureResourceDoesNotExist() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( @@ -188,7 +188,7 @@ public void testGetFailureResourceDoesNotExist() throws Exception { @Test public void testGetUnauthorized() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); AddTermsResolver resolver = new AddTermsResolver(mockService); @@ -207,7 +207,7 @@ public void testGetUnauthorized() throws Exception { @Test public void testGetEntityClientException() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.doThrow(RuntimeException.class) .when(mockService) diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/term/BatchAddTermsResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/term/BatchAddTermsResolverTest.java index 185bb5cc97953a..ced9e371814f7e 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/term/BatchAddTermsResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/term/BatchAddTermsResolverTest.java @@ -35,7 +35,7 @@ public class BatchAddTermsResolverTest { @Test public void testGetSuccessNoExistingTerms() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( @@ -99,7 +99,7 @@ public void testGetSuccessExistingTerms() throws Exception { new GlossaryTermAssociation() .setUrn(GlossaryTermUrn.createFromString(TEST_GLOSSARY_TERM_1_URN))))); - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( @@ -155,7 +155,7 @@ public void testGetSuccessExistingTerms() throws Exception { @Test public void testGetFailureTagDoesNotExist() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( @@ -189,7 +189,7 @@ public void testGetFailureTagDoesNotExist() throws Exception { @Test public void testGetFailureResourceDoesNotExist() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( @@ -234,7 +234,7 @@ public void testGetFailureResourceDoesNotExist() throws Exception { @Test public void testGetUnauthorized() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); BatchAddTermsResolver resolver = new BatchAddTermsResolver(mockService); @@ -256,7 +256,7 @@ public void testGetUnauthorized() throws Exception { @Test public void testGetEntityClientException() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.doThrow(RuntimeException.class) .when(mockService) diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/term/BatchRemoveTermsResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/term/BatchRemoveTermsResolverTest.java index 9e269bed436d98..254a301159ac25 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/term/BatchRemoveTermsResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/term/BatchRemoveTermsResolverTest.java @@ -35,7 +35,7 @@ public class BatchRemoveTermsResolverTest { @Test public void testGetSuccessNoExistingTerms() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( @@ -82,7 +82,7 @@ public void testGetSuccessNoExistingTerms() throws Exception { @Test public void testGetSuccessExistingTerms() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); final GlossaryTerms oldTerms1 = new GlossaryTerms() @@ -148,7 +148,7 @@ public void testGetSuccessExistingTerms() throws Exception { @Test public void testGetFailureResourceDoesNotExist() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.when( mockService.getAspect( @@ -192,7 +192,7 @@ public void testGetFailureResourceDoesNotExist() throws Exception { @Test public void testGetUnauthorized() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); BatchRemoveTermsResolver resolver = new BatchRemoveTermsResolver(mockService); @@ -214,7 +214,7 @@ public void testGetUnauthorized() throws Exception { @Test public void testGetEntityClientException() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.doThrow(RuntimeException.class) .when(mockService) diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/utils/OwnerUtilsTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/utils/OwnerUtilsTest.java index b4097d9dd045df..d524d8bfb9a6b3 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/utils/OwnerUtilsTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/utils/OwnerUtilsTest.java @@ -59,6 +59,7 @@ public void testIsOwnerEqualOnlyOwnershipTypeUrn() throws URISyntaxException { Urn technicalOwnershipTypeUrn = new Urn(TECHNICAL_OWNER_OWNERSHIP_TYPE_URN); Urn businessOwnershipTypeUrn = new Urn(BUSINESS_OWNER_OWNERSHIP_TYPE_URN); Urn ownerUrn1 = new Urn("urn:li:corpuser:foo"); + Urn ownerUrn2 = new Urn("urn:li:corpuser:bar"); Owner ownerWithTechnicalOwnership = new Owner(); ownerWithTechnicalOwnership.setOwner(ownerUrn1); @@ -72,12 +73,17 @@ public void testIsOwnerEqualOnlyOwnershipTypeUrn() throws URISyntaxException { ownerWithoutOwnershipType.setOwner(ownerUrn1); ownerWithoutOwnershipType.setType(OwnershipType.NONE); + Owner owner2WithoutOwnershipType = new Owner(); + owner2WithoutOwnershipType.setOwner(ownerUrn2); + owner2WithoutOwnershipType.setType(OwnershipType.NONE); + assertTrue( OwnerUtils.isOwnerEqual(ownerWithTechnicalOwnership, ownerUrn1, technicalOwnershipTypeUrn)); assertFalse( OwnerUtils.isOwnerEqual(ownerWithBusinessOwnership, ownerUrn1, technicalOwnershipTypeUrn)); - assertFalse(OwnerUtils.isOwnerEqual(ownerWithTechnicalOwnership, ownerUrn1, null)); + assertTrue(OwnerUtils.isOwnerEqual(ownerWithTechnicalOwnership, ownerUrn1, null)); assertTrue(OwnerUtils.isOwnerEqual(ownerWithoutOwnershipType, ownerUrn1, null)); + assertFalse(OwnerUtils.isOwnerEqual(owner2WithoutOwnershipType, ownerUrn1, null)); } public void testIsOwnerEqualWithBothLegacyAndNewType() throws URISyntaxException { diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/RestoreIndicesConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/RestoreIndicesConfig.java index 949b75edaa6ba0..26e40485787e90 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/RestoreIndicesConfig.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/RestoreIndicesConfig.java @@ -8,37 +8,23 @@ import io.ebean.Database; import javax.annotation.Nonnull; import lombok.extern.slf4j.Slf4j; -import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; -import org.springframework.context.ApplicationContext; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; -import org.springframework.context.annotation.DependsOn; @Slf4j @Configuration public class RestoreIndicesConfig { - @Autowired ApplicationContext applicationContext; @Bean(name = "restoreIndices") - @DependsOn({ - "ebeanServer", - "entityService", - "systemMetadataService", - "searchService", - "graphService" - }) @ConditionalOnProperty(name = "entityService.impl", havingValue = "ebean", matchIfMissing = true) @Nonnull - public RestoreIndices createInstance() { - final Database ebeanServer = applicationContext.getBean(Database.class); - final EntityService entityService = applicationContext.getBean(EntityService.class); - final SystemMetadataService systemMetadataService = - applicationContext.getBean(SystemMetadataService.class); - final EntitySearchService entitySearchService = - applicationContext.getBean(EntitySearchService.class); - final GraphService graphService = applicationContext.getBean(GraphService.class); - + public RestoreIndices createInstance( + final Database ebeanServer, + final EntityService entityService, + final EntitySearchService entitySearchService, + final GraphService graphService, + final SystemMetadataService systemMetadataService) { return new RestoreIndices( ebeanServer, entityService, systemMetadataService, entitySearchService, graphService); } diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/browsepaths/BackfillBrowsePathsV2Step.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/browsepaths/BackfillBrowsePathsV2Step.java index 09b27fbcfbdf29..a1d559d05ad2fb 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/browsepaths/BackfillBrowsePathsV2Step.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/browsepaths/BackfillBrowsePathsV2Step.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.upgrade.system.browsepaths; import static com.linkedin.metadata.Constants.*; +import static com.linkedin.metadata.utils.SystemMetadataUtils.createDefaultSystemMetadata; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; @@ -29,7 +30,6 @@ import com.linkedin.metadata.search.SearchService; import com.linkedin.metadata.utils.GenericRecordUtils; import com.linkedin.mxe.MetadataChangeProposal; -import com.linkedin.mxe.SystemMetadata; import io.datahubproject.metadata.context.OperationContext; import java.util.Set; import java.util.function.Function; @@ -208,8 +208,7 @@ private void ingestBrowsePathsV2( proposal.setEntityType(urn.getEntityType()); proposal.setAspectName(Constants.BROWSE_PATHS_V2_ASPECT_NAME); proposal.setChangeType(ChangeType.UPSERT); - proposal.setSystemMetadata( - new SystemMetadata().setRunId(DEFAULT_RUN_ID).setLastObserved(System.currentTimeMillis())); + proposal.setSystemMetadata(createDefaultSystemMetadata()); proposal.setAspect(GenericRecordUtils.serializeAspect(browsePathsV2)); entityService.ingestProposal(opContext, proposal, auditStamp, true); } diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/policyfields/BackfillPolicyFieldsStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/policyfields/BackfillPolicyFieldsStep.java index 93bf8cc5b9b60e..c65a45aefc357f 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/policyfields/BackfillPolicyFieldsStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/policyfields/BackfillPolicyFieldsStep.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.upgrade.system.policyfields; import static com.linkedin.metadata.Constants.*; +import static com.linkedin.metadata.utils.SystemMetadataUtils.createDefaultSystemMetadata; import com.google.common.collect.ImmutableList; import com.linkedin.common.AuditStamp; @@ -25,7 +26,6 @@ import com.linkedin.metadata.search.ScrollResult; import com.linkedin.metadata.search.SearchEntity; import com.linkedin.metadata.search.SearchService; -import com.linkedin.mxe.SystemMetadata; import com.linkedin.policy.DataHubPolicyInfo; import io.datahubproject.metadata.context.OperationContext; import java.net.URISyntaxException; @@ -234,9 +234,7 @@ private Optional> ingestPolicyFields( null, infoAspect, null, - new SystemMetadata() - .setRunId(DEFAULT_RUN_ID) - .setLastObserved(System.currentTimeMillis()), + createDefaultSystemMetadata(), auditStamp, ChangeType.RESTATE) .getFirst()); diff --git a/datahub-web-react/.eslintrc.js b/datahub-web-react/.eslintrc.js index 5627283af1af1c..3fdf7b6a3042ca 100644 --- a/datahub-web-react/.eslintrc.js +++ b/datahub-web-react/.eslintrc.js @@ -48,7 +48,7 @@ module.exports = { ], 'vitest/prefer-to-be': 'off', '@typescript-eslint/no-use-before-define': ['error', { functions: false, classes: false }], - 'react-refresh/only-export-components': ['warn', { 'allowConstantExport': true }], + 'react-refresh/only-export-components': ['warn', { allowConstantExport: true }], }, settings: { react: { diff --git a/datahub-web-react/README.md b/datahub-web-react/README.md index 560f5315b2c71f..86bbb349b027c4 100644 --- a/datahub-web-react/README.md +++ b/datahub-web-react/README.md @@ -1,44 +1,47 @@ --- -title: "datahub-web-react" +title: 'datahub-web-react' --- # DataHub React App ## About -This module contains a React application that serves as the DataHub UI. -Feel free to take a look around, deploy, and contribute. +This module contains a React application that serves as the DataHub UI. +Feel free to take a look around, deploy, and contribute. ## Functional Goals + The initial milestone for the app was to achieve functional parity with the previous Ember app. This meant supporting -- Dataset Profiles, Search, Browse Experience -- User Profiles, Search -- LDAP Authentication Flow +- Dataset Profiles, Search, Browse Experience +- User Profiles, Search +- LDAP Authentication Flow -This has since been achieved. The new set of functional goals are reflected in the latest version of the [DataHub Roadmap](../docs/roadmap.md). +This has since been achieved. The new set of functional goals are reflected in the latest version of the [DataHub Roadmap](../docs/roadmap.md). ## Design Goals + In building out the client experience, we intend to leverage learnings from the previous Ember-based app and incorporate feedback gathered from organizations operating DataHub. Two themes have emerged to serve as guideposts: -1. **Configurability**: The client experience should be configurable, such that deploying organizations can tailor certain - aspects to their needs. This includes theme / styling configurability, showing and hiding specific functionality, - customizing copy & logos, etc. - -2. **Extensibility**: Extending the *functionality* of DataHub should be as simple as possible. Making changes like - extending an existing entity & adding a new entity should require minimal effort and should be well covered in detailed - documentation. +1. **Configurability**: The client experience should be configurable, such that deploying organizations can tailor certain + aspects to their needs. This includes theme / styling configurability, showing and hiding specific functionality, + customizing copy & logos, etc. +2. **Extensibility**: Extending the _functionality_ of DataHub should be as simple as possible. Making changes like + extending an existing entity & adding a new entity should require minimal effort and should be well covered in detailed + documentation. ## Starting the Application ### Quick Start Navigate to the `docker` directory and run the following to spin up the react app: + ``` ./quickstart.sh ``` + at `http://localhost:9002`. If you want to make changes to the UI see them live without having to rebuild the `datahub-frontend-react` docker image, you @@ -54,8 +57,9 @@ Optionally you could also start the app with the mock server without running the ### Testing your customizations There is two options to test your customizations: -* **Option 1**: Initialize the docker containers with the `quickstart.sh` script (or if any custom docker-compose file) and then run `yarn start` in this directory. This will start a forwarding server at `localhost:3000` that will use the `datahub-frontend` server at `http://localhost:9002` to fetch real data. -* **Option 2**: Change the environment variable `REACT_APP_PROXY_TARGET` in the `.env` file to point to your `datahub-frontend` server (ex: https://my_datahub_host.com) and then run `yarn start` in this directory. This will start a forwarding server at `localhost:3000` that will use the `datahub-frontend` server at some domain to fetch real data. + +- **Option 1**: Initialize the docker containers with the `quickstart.sh` script (or if any custom docker-compose file) and then run `yarn start` in this directory. This will start a forwarding server at `localhost:3000` that will use the `datahub-frontend` server at `http://localhost:9002` to fetch real data. +- **Option 2**: Change the environment variable `REACT_APP_PROXY_TARGET` in the `.env` file to point to your `datahub-frontend` server (ex: https://my_datahub_host.com) and then run `yarn start` in this directory. This will start a forwarding server at `localhost:3000` that will use the `datahub-frontend` server at some domain to fetch real data. The option 2 is useful if you want to test your React customizations without having to run the hole DataHub stack locally. However, if you changed other components of the DataHub stack, you will need to run the hole stack locally (building the docker images) and use the option 1. @@ -68,10 +72,10 @@ In order to start a server and run frontend unit tests using react-testing-frame There are also more automated tests using Cypress in the `smoke-test` folder of the repository root. #### Troubleshooting + `Error: error:0308010C:digital envelope routines::unsupported`: This error message shows up when using Node 17, due to an OpenSSL update related to md5. The best workaround is to revert to the Active LTS version of Node, 16.13.0 with the command `nvm install 16.13.0` and if necessary reinstall yarn `npm install --global yarn`. - ### Theming #### Customizing your App without rebuilding assets @@ -108,74 +112,74 @@ you to terminate and re-run `yarn start` to see updated styles. The `src` dir of the app is broken down into the following modules -**conf** - Stores global configuration flags that can be referenced across the app. For example, the number of +**conf** - Stores global configuration flags that can be referenced across the app. For example, the number of search results shown per page, or the placeholder text in the search bar box. It serves as a location where levels -for functional configurability should reside. +for functional configurability should reside. **app** - Contains all important components of the app. It has a few sub-modules: -- `auth`: Components used to render the user authentication experience. -- `browse`: Shared components used to render the 'browse-by-path' experience. The experience is akin to navigating a filesystem hierarchy. -- `preview`: Shared components used to render Entity 'preview' views. These can appear in search results, browse results, - and within entity profile pages. -- `search`: Shared components used to render the full-text search experience. -- `shared`: Misc. shared components -- `entity`: Contains Entity definitions, where entity-specific functionality resides. - Configuration is provided by implementing the 'Entity' interface. (See DatasetEntity.tsx for example) - There are 2 visual components each entity should supply: - - `profiles`: display relevant details about an individual entity. This serves as the entity's 'profile'. - - `previews`: provide a 'preview', or a smaller details card, containing the most important information about an entity instance. - - When rendering a preview, the entity's data and the type of preview (SEARCH, BROWSE, PREVIEW) are provided. This +- `auth`: Components used to render the user authentication experience. +- `browse`: Shared components used to render the 'browse-by-path' experience. The experience is akin to navigating a filesystem hierarchy. +- `preview`: Shared components used to render Entity 'preview' views. These can appear in search results, browse results, + and within entity profile pages. +- `search`: Shared components used to render the full-text search experience. +- `shared`: Misc. shared components +- `entity`: Contains Entity definitions, where entity-specific functionality resides. + Configuration is provided by implementing the 'Entity' interface. (See DatasetEntity.tsx for example) + There are 2 visual components each entity should supply: + + - `profiles`: display relevant details about an individual entity. This serves as the entity's 'profile'. + - `previews`: provide a 'preview', or a smaller details card, containing the most important information about an entity instance. + + When rendering a preview, the entity's data and the type of preview (SEARCH, BROWSE, PREVIEW) are provided. This allows you to optionally customize the way an entities preview is rendered in different views. - - - `entity registry`: There's another very important piece of code living within this module: the **EntityRegistry**. This is a layer + + - `entity registry`: There's another very important piece of code living within this module: the **EntityRegistry**. This is a layer of abstraction over the intimate details of rendering a particular entity. It is used to render a view associated with a particular entity type (user, dataset, etc.). - - +

-**graphql** - The React App talks to the `dathub-frontend` server using GraphQL. This module is where the *queries* issued -against the server are defined. Once defined, running `yarn run generate` will code-gen TypeScript objects to make invoking +**graphql** - The React App talks to the `dathub-frontend` server using GraphQL. This module is where the _queries_ issued +against the server are defined. Once defined, running `yarn run generate` will code-gen TypeScript objects to make invoking these queries extremely easy. An example can be found at the top of `SearchPage.tsx.` -**images** - Images to be displayed within the app. This is where one would place a custom logo image. +**images** - Images to be displayed within the app. This is where one would place a custom logo image. ## Adding an Entity The following outlines a series of steps required to introduce a new entity into the React app: -1. Declare the GraphQL Queries required to display the new entity - - If search functionality should be supported, extend the "search" query within `search.graphql` to fetch the new +1. Declare the GraphQL Queries required to display the new entity + + - If search functionality should be supported, extend the "search" query within `search.graphql` to fetch the new + entity data. + - If browse functionality should be supported, extend the "browse" query within `browse.graphql` to fetch the new entity data. - - If browse functionality should be supported, extend the "browse" query within `browse.graphql` to fetch the new - entity data. - - If display a 'profile' should be supported (most often), introduce a new `.graphql` file that contains a - `get` query to fetch the entity by primary key (urn). - - Note that your new entity *must* implement the `Entity` GraphQL type interface, and thus must have a corresponding - `EntityType`. - - -2. Implement the `Entity` interface + - If display a 'profile' should be supported (most often), introduce a new `.graphql` file that contains a + `get` query to fetch the entity by primary key (urn). + + Note that your new entity _must_ implement the `Entity` GraphQL type interface, and thus must have a corresponding + `EntityType`. + +2. Implement the `Entity` interface + - Create a new folder under `src/components/entity` corresponding to your entity - Create a class that implements the `Entity` interface (example: `DatasetEntity.tsx`) - - Provide an implementation each method defined on the interface. - - This class specifies whether your new entity should be searchable & browsable, defines the names used to - identify your entity when instances are rendered in collection / when entity appears - in the URL path, and provides the ability to render your entity given data returned by the GQL API. - + - Provide an implementation each method defined on the interface. + - This class specifies whether your new entity should be searchable & browsable, defines the names used to + identify your entity when instances are rendered in collection / when entity appears + in the URL path, and provides the ability to render your entity given data returned by the GQL API. 3. Register the new entity in the `EntityRegistry` - - Update `App.tsx` to register an instance of your new entity. Now your entity will be accessible via the registry + - Update `App.tsx` to register an instance of your new entity. Now your entity will be accessible via the registry and appear in the UI. To manually retrieve the info about your entity or others, simply use an instance - of the `EntityRegistry`, which is provided via `ReactContext` to *all* components in the hierarchy. + of the `EntityRegistry`, which is provided via `ReactContext` to _all_ components in the hierarchy. For example - ``` - entityRegistry.getCollectionName(EntityType.YOUR_NEW_ENTITY) - ``` - -That's it! For any questions, do not hesitate to reach out on the DataHub Slack community in #datahub-react. + ``` + entityRegistry.getCollectionName(EntityType.YOUR_NEW_ENTITY) + ``` + +That's it! For any questions, do not hesitate to reach out on the DataHub Slack community in #datahub-react. diff --git a/datahub-web-react/src/app/analytics/event.ts b/datahub-web-react/src/app/analytics/event.ts index d63b731c720426..c3a57830b8c504 100644 --- a/datahub-web-react/src/app/analytics/event.ts +++ b/datahub-web-react/src/app/analytics/event.ts @@ -191,6 +191,7 @@ export interface SearchResultClickEvent extends BaseEvent { entityTypeFilter?: EntityType; index: number; total: number; + pageNumber: number; } export interface SearchFiltersClearAllEvent extends BaseEvent { diff --git a/datahub-web-react/src/app/entity/dataset/DatasetEntity.tsx b/datahub-web-react/src/app/entity/dataset/DatasetEntity.tsx index c30fee7abc0b6d..21ae085832cb3f 100644 --- a/datahub-web-react/src/app/entity/dataset/DatasetEntity.tsx +++ b/datahub-web-react/src/app/entity/dataset/DatasetEntity.tsx @@ -220,6 +220,7 @@ export class DatasetEntity implements Entity { }, ]} sidebarSections={this.getSidebarSections()} + isNameEditable /> ); @@ -283,7 +284,7 @@ export class DatasetEntity implements Entity { return ( { return ( { }; displayName = (data: Dataset) => { - return data?.properties?.name || data.name || data.urn; + return data?.editableProperties?.name || data?.properties?.name || data.name || data.urn; }; platformLogoUrl = (data: Dataset) => { diff --git a/datahub-web-react/src/app/entity/dataset/profile/schema/components/SchemaDescriptionField.tsx b/datahub-web-react/src/app/entity/dataset/profile/schema/components/SchemaDescriptionField.tsx index ce8d03fbdc9602..e7d986028d4a66 100644 --- a/datahub-web-react/src/app/entity/dataset/profile/schema/components/SchemaDescriptionField.tsx +++ b/datahub-web-react/src/app/entity/dataset/profile/schema/components/SchemaDescriptionField.tsx @@ -5,6 +5,8 @@ import styled from 'styled-components'; import { FetchResult } from '@apollo/client'; import { UpdateDatasetMutation } from '../../../../../../graphql/dataset.generated'; +import { StringMapEntry } from '../../../../../../types.generated'; +import PropagationDetails from '../../../../shared/propagation/PropagationDetails'; import UpdateDescriptionModal from '../../../../shared/components/legacy/DescriptionModal'; import StripMarkdownText, { removeMarkdown } from '../../../../shared/components/styled/StripMarkdownText'; import SchemaEditableContext from '../../../../../shared/SchemaEditableContext'; @@ -28,6 +30,11 @@ const ExpandedActions = styled.div` height: 10px; `; +const DescriptionWrapper = styled.span` + display: inline-flex; + align-items: center; +`; + const DescriptionContainer = styled.div` position: relative; display: flex; @@ -105,6 +112,8 @@ type Props = { isEdited?: boolean; isReadOnly?: boolean; businessAttributeDescription?: string; + isPropagated?: boolean; + sourceDetail?: StringMapEntry[] | null; }; const ABBREVIATED_LIMIT = 80; @@ -120,6 +129,8 @@ export default function DescriptionField({ original, isReadOnly, businessAttributeDescription, + isPropagated, + sourceDetail, }: Props) { const [showAddModal, setShowAddModal] = useState(false); const overLimit = removeMarkdown(description).length > 80; @@ -163,7 +174,7 @@ export default function DescriptionField({ return ( - {expanded || !overLimit ? ( + {expanded ? ( <> {!!description && } {!!description && (EditButton || overLimit) && ( @@ -184,25 +195,29 @@ export default function DescriptionField({ ) : ( <> - - { - e.stopPropagation(); - handleExpanded(true); - }} - > - Read More - - - } - suffix={EditButton} - shouldWrap - > - {description} - + + {isPropagated && } +   + + { + e.stopPropagation(); + handleExpanded(true); + }} + > + Read More + + + } + suffix={EditButton} + shouldWrap + > + {description} + + )} {isEdited && (edited)} diff --git a/datahub-web-react/src/app/entity/shared/components/legacy/DescriptionModal.tsx b/datahub-web-react/src/app/entity/shared/components/legacy/DescriptionModal.tsx index 0e899bc391e0a7..2d65a305b4cc8b 100644 --- a/datahub-web-react/src/app/entity/shared/components/legacy/DescriptionModal.tsx +++ b/datahub-web-react/src/app/entity/shared/components/legacy/DescriptionModal.tsx @@ -19,16 +19,29 @@ const StyledViewer = styled(Editor)` } `; +const OriginalDocumentation = styled(Form.Item)` + margin-bottom: 0; +`; + type Props = { title: string; description?: string | undefined; original?: string | undefined; + propagatedDescription?: string | undefined; onClose: () => void; onSubmit: (description: string) => void; isAddDesc?: boolean; }; -export default function UpdateDescriptionModal({ title, description, original, onClose, onSubmit, isAddDesc }: Props) { +export default function UpdateDescriptionModal({ + title, + description, + original, + propagatedDescription, + onClose, + onSubmit, + isAddDesc, +}: Props) { const [updatedDesc, setDesc] = useState(description || original || ''); const handleEditorKeyDown = (event: React.KeyboardEvent) => { @@ -72,9 +85,14 @@ export default function UpdateDescriptionModal({ title, description, original, o /> {!isAddDesc && description && original && ( - Original:}> + Original:}> - + + )} + {!isAddDesc && description && propagatedDescription && ( + Propagated:}> + + )} diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/__tests__/EntityHeader.test.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/__tests__/EntityHeader.test.tsx index db347d4f1cc54c..ec6a91df9019ab 100644 --- a/datahub-web-react/src/app/entity/shared/containers/profile/__tests__/EntityHeader.test.tsx +++ b/datahub-web-react/src/app/entity/shared/containers/profile/__tests__/EntityHeader.test.tsx @@ -3,13 +3,14 @@ import { EntityType } from '../../../../../../types.generated'; import { getCanEditName } from '../header/EntityHeader'; describe('getCanEditName', () => { - const entityDataWithManagePrivileges = { privileges: { canManageEntity: true } }; - const entityDataWithoutManagePrivileges = { privileges: { canManageEntity: false } }; + const entityDataWithManagePrivileges = { privileges: { canManageEntity: true, canEditProperties: true } }; + const entityDataWithoutManagePrivileges = { privileges: { canManageEntity: false, canEditProperties: false } }; it('should return true for Terms if manageGlossaries privilege is true', () => { const canEditName = getCanEditName( EntityType.GlossaryTerm, entityDataWithoutManagePrivileges, + true, platformPrivileges, ); @@ -21,6 +22,7 @@ describe('getCanEditName', () => { const canEditName = getCanEditName( EntityType.GlossaryTerm, entityDataWithoutManagePrivileges, + true, privilegesWithoutGlossaries, ); @@ -32,6 +34,7 @@ describe('getCanEditName', () => { const canEditName = getCanEditName( EntityType.GlossaryTerm, entityDataWithManagePrivileges, + true, privilegesWithoutGlossaries, ); @@ -42,6 +45,7 @@ describe('getCanEditName', () => { const canEditName = getCanEditName( EntityType.GlossaryNode, entityDataWithoutManagePrivileges, + true, platformPrivileges, ); @@ -53,6 +57,7 @@ describe('getCanEditName', () => { const canEditName = getCanEditName( EntityType.GlossaryNode, entityDataWithoutManagePrivileges, + true, privilegesWithoutGlossaries, ); @@ -64,6 +69,7 @@ describe('getCanEditName', () => { const canEditName = getCanEditName( EntityType.GlossaryNode, entityDataWithManagePrivileges, + true, privilegesWithoutGlossaries, ); @@ -71,7 +77,12 @@ describe('getCanEditName', () => { }); it('should return true for Domains if manageDomains privilege is true', () => { - const canEditName = getCanEditName(EntityType.Domain, entityDataWithoutManagePrivileges, platformPrivileges); + const canEditName = getCanEditName( + EntityType.Domain, + entityDataWithoutManagePrivileges, + true, + platformPrivileges, + ); expect(canEditName).toBe(true); }); @@ -81,6 +92,7 @@ describe('getCanEditName', () => { const canEditName = getCanEditName( EntityType.Domain, entityDataWithoutManagePrivileges, + true, privilegesWithoutDomains, ); @@ -88,7 +100,30 @@ describe('getCanEditName', () => { }); it('should return false for an unsupported entity', () => { - const canEditName = getCanEditName(EntityType.Chart, entityDataWithManagePrivileges, platformPrivileges); + const canEditName = getCanEditName(EntityType.Chart, entityDataWithManagePrivileges, true, platformPrivileges); + + expect(canEditName).toBe(false); + }); + + it('should return true for a dataset if canEditProperties is true', () => { + const canEditName = getCanEditName(EntityType.Chart, entityDataWithManagePrivileges, true, platformPrivileges); + + expect(canEditName).toBe(false); + }); + + it('should return false for a dataset if canEditProperties is false', () => { + const canEditName = getCanEditName( + EntityType.Chart, + entityDataWithoutManagePrivileges, + true, + platformPrivileges, + ); + + expect(canEditName).toBe(false); + }); + + it('should return false for a dataset if isEditableDatasetNameEnabled is false', () => { + const canEditName = getCanEditName(EntityType.Chart, entityDataWithManagePrivileges, false, platformPrivileges); expect(canEditName).toBe(false); }); diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityHeader.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityHeader.tsx index 09fa23dbc9f57c..12fa9131f33c73 100644 --- a/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityHeader.tsx +++ b/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityHeader.tsx @@ -17,6 +17,7 @@ import { capitalizeFirstLetterOnly } from '../../../../../shared/textUtil'; import { useUserContext } from '../../../../../context/useUserContext'; import { useEntityRegistry } from '../../../../../useEntityRegistry'; import EntityHeaderLoadingSection from './EntityHeaderLoadingSection'; +import { useIsEditableDatasetNameEnabled } from '../../../../../useAppConfig'; const TitleWrapper = styled.div` display: flex; @@ -59,6 +60,7 @@ const TopButtonsWrapper = styled.div` export function getCanEditName( entityType: EntityType, entityData: GenericEntityProperties | null, + isEditableDatasetNameEnabled: boolean, privileges?: PlatformPrivileges, ) { switch (entityType) { @@ -71,6 +73,8 @@ export function getCanEditName( return true; // TODO: add permissions for data products case EntityType.BusinessAttribute: return privileges?.manageBusinessAttributes; + case EntityType.Dataset: + return isEditableDatasetNameEnabled && entityData?.privileges?.canEditProperties; default: return false; } @@ -94,8 +98,15 @@ export const EntityHeader = ({ headerDropdownItems, headerActionItems, isNameEdi const entityName = entityData?.name; const subType = capitalizeFirstLetterOnly(entityData?.subTypes?.typeNames?.[0]) || undefined; + const isEditableDatasetNameEnabled = useIsEditableDatasetNameEnabled(); const canEditName = - isNameEditable && getCanEditName(entityType, entityData, me?.platformPrivileges as PlatformPrivileges); + isNameEditable && + getCanEditName( + entityType, + entityData, + isEditableDatasetNameEnabled, + me?.platformPrivileges as PlatformPrivileges, + ); const entityRegistry = useEntityRegistry(); return ( @@ -106,7 +117,7 @@ export const EntityHeader = ({ headerDropdownItems, headerActionItems, isNameEdi <> - + {entityData?.deprecation?.deprecated && ( ({ sidebarSections, topSection }: Props) => { return ( <> {topSection && } - {entityData?.lastIngested && ( + {!!entityData?.lastIngested && ( diff --git a/datahub-web-react/src/app/entity/shared/propagation/PropagationDetails.tsx b/datahub-web-react/src/app/entity/shared/propagation/PropagationDetails.tsx new file mode 100644 index 00000000000000..646f47134938c4 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/propagation/PropagationDetails.tsx @@ -0,0 +1,109 @@ +import React from 'react'; +import styled from 'styled-components'; +import { Popover } from 'antd'; +import { StringMapEntry } from '../../../../types.generated'; +import PropagationEntityLink from './PropagationEntityLink'; +import { usePropagationDetails } from './utils'; +import { PropagateThunderbolt, PropagateThunderboltFilled } from './PropagationIcon'; + +const PopoverWrapper = styled.div` + display: flex; + flex-direction: column; +`; + +const PopoverTitle = styled.div` + font-weight: bold; + font-size: 14px; + padding: 6px 0px; + color: #eeecfa; +`; + +const PopoverDescription = styled.div` + max-width: 340px; + font-size: 14px; + color: #eeecfa; + display: inline; + padding: 0px 0px 8px 0px; +`; + +const PopoverAttributes = styled.div` + display: flex; +`; + +const PopoverAttribute = styled.div` + margin-right: 12px; + margin-bottom: 4px; +`; + +const PopoverAttributeTitle = styled.div` + font-size: 14px; + color: #eeecfa; + font-weight: bold; + margin: 8px 0px; + overflow: hidden; + text-overflow: ellipsis; +`; + +const PopoverDocumentation = styled.a` + margin-top: 12px; +`; + +interface Props { + sourceDetail?: StringMapEntry[] | null; +} + +export default function PropagationDetails({ sourceDetail }: Props) { + const { + isPropagated, + origin: { entity: originEntity }, + via: { entity: viaEntity }, + } = usePropagationDetails(sourceDetail); + + if (!sourceDetail || !isPropagated) return null; + + const popoverContent = + originEntity || viaEntity ? ( + + + This description was automatically propagated from an upstream column.{' '} + + Learn more + + + + {originEntity && originEntity.urn !== viaEntity?.urn && ( + + Origin + + + )} + {viaEntity && ( + + Via + + + )} + + + ) : undefined; + + return ( + + + Propagated Description + + } + content={popoverContent} + > + + + ); +} diff --git a/datahub-web-react/src/app/entity/shared/propagation/PropagationEntityLink.tsx b/datahub-web-react/src/app/entity/shared/propagation/PropagationEntityLink.tsx new file mode 100644 index 00000000000000..8c1285dd5808b1 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/propagation/PropagationEntityLink.tsx @@ -0,0 +1,56 @@ +import React from 'react'; +import styled from 'styled-components'; +import { Link } from 'react-router-dom'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import { Entity, EntityType, SchemaFieldEntity } from '../../../../types.generated'; +import { GenericEntityProperties } from '../types'; + +const PreviewImage = styled.img<{ size: number }>` + height: ${(props) => props.size}px; + width: ${(props) => props.size}px; + min-width: ${(props) => props.size}px; + object-fit: contain; + background-color: transparent; + margin: 0px 4px 0px 0px; +`; + +const StyledLink = styled(Link)` + margin-right: 4px; + display: flex; + align-items: center; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +`; + +interface Props { + entity: Entity; +} + +export default function PropagationEntityLink({ entity }: Props) { + const entityRegistry = useEntityRegistry(); + + const isSchemaField = entity.type === EntityType.SchemaField; + const baseEntity = isSchemaField ? (entity as SchemaFieldEntity).parent : entity; + + const logoUrl = (baseEntity as GenericEntityProperties)?.platform?.properties?.logoUrl || ''; + let entityUrl = entityRegistry.getEntityUrl(baseEntity.type, baseEntity.urn); + let entityDisplayName = entityRegistry.getDisplayName(baseEntity.type, baseEntity); + + if (isSchemaField) { + entityUrl = `${entityUrl}/${encodeURIComponent('Columns')}?schemaFilter=${encodeURIComponent( + (entity as SchemaFieldEntity).fieldPath, + )}`; + const schemaFieldName = entityRegistry.getDisplayName(entity.type, entity); + entityDisplayName = `${entityDisplayName}.${schemaFieldName}`; + } + + return ( + <> + + + {entityDisplayName} + + + ); +} diff --git a/datahub-web-react/src/app/entity/shared/propagation/PropagationIcon.tsx b/datahub-web-react/src/app/entity/shared/propagation/PropagationIcon.tsx new file mode 100644 index 00000000000000..01b4570c4ca0df --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/propagation/PropagationIcon.tsx @@ -0,0 +1,22 @@ +import styled from 'styled-components'; +import { ThunderboltFilled } from '@ant-design/icons'; +import { REDESIGN_COLORS } from '../constants'; + +export const PropagateThunderbolt = styled(ThunderboltFilled)` + && { + color: #a7c7fa; + } + font-size: 16px; + &:hover { + color: ${REDESIGN_COLORS.BLUE}; + } + margin-right: 4px; +`; + +export const PropagateThunderboltFilled = styled(ThunderboltFilled)` + && { + color: ${REDESIGN_COLORS.BLUE}; + } + font-size: 16px; + margin-right: 4px; +`; diff --git a/datahub-web-react/src/app/entity/shared/propagation/utils.ts b/datahub-web-react/src/app/entity/shared/propagation/utils.ts new file mode 100644 index 00000000000000..d8b4d4d931f4ee --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/propagation/utils.ts @@ -0,0 +1,24 @@ +import { StringMapEntry } from '../../../../types.generated'; +import { useGetEntities } from '../useGetEntities'; + +export function usePropagationDetails(sourceDetail?: StringMapEntry[] | null) { + const isPropagated = !!sourceDetail?.find((mapEntry) => mapEntry.key === 'propagated' && mapEntry.value === 'true'); + const originEntityUrn = sourceDetail?.find((mapEntry) => mapEntry.key === 'origin')?.value || ''; + const viaEntityUrn = sourceDetail?.find((mapEntry) => mapEntry.key === 'via')?.value || ''; + + const entities = useGetEntities([originEntityUrn, viaEntityUrn]); + const originEntity = entities.find((e) => e.urn === originEntityUrn); + const viaEntity = entities.find((e) => e.urn === viaEntityUrn); + + return { + isPropagated, + origin: { + urn: originEntityUrn, + entity: originEntity, + }, + via: { + urn: viaEntityUrn, + entity: viaEntity, + }, + }; +} diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/FieldDescription.tsx b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/FieldDescription.tsx index be95cba3ab4f07..e64a1436b0b1c5 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/FieldDescription.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/FieldDescription.tsx @@ -6,6 +6,8 @@ import styled from 'styled-components'; import { SectionHeader, StyledDivider } from './components'; import UpdateDescriptionModal from '../../../../../components/legacy/DescriptionModal'; import { EditableSchemaFieldInfo, SchemaField, SubResourceType } from '../../../../../../../../types.generated'; +import { getFieldDescriptionDetails } from '../../utils/getFieldDescriptionDetails'; +import PropagationDetails from '../../../../../propagation/PropagationDetails'; import DescriptionSection from '../../../../../containers/profile/sidebar/AboutSection/DescriptionSection'; import { useEntityData, useMutationUrn, useRefetch } from '../../../../../EntityContext'; import { useSchemaRefetch } from '../../SchemaContext'; @@ -13,11 +15,6 @@ import { useUpdateDescriptionMutation } from '../../../../../../../../graphql/mu import analytics, { EntityActionType, EventType } from '../../../../../../../analytics'; import SchemaEditableContext from '../../../../../../../shared/SchemaEditableContext'; -const DescriptionWrapper = styled.div` - display: flex; - justify-content: space-between; -`; - const EditIcon = styled(Button)` border: none; box-shadow: none; @@ -25,6 +22,13 @@ const EditIcon = styled(Button)` width: 20px; `; +const DescriptionWrapper = styled.div` + display: flex; + gap: 4px; + align-items: center; + justify-content: space-between; +`; + interface Props { expandedField: SchemaField; editableFieldInfo?: EditableSchemaFieldInfo; @@ -76,7 +80,13 @@ export default function FieldDescription({ expandedField, editableFieldInfo }: P }, }); - const displayedDescription = editableFieldInfo?.description || expandedField.description; + const { schemaFieldEntity, description } = expandedField; + const { displayedDescription, isPropagated, sourceDetail, propagatedDescription } = getFieldDescriptionDetails({ + schemaFieldEntity, + editableFieldInfo, + defaultDescription: description, + }); + const baDescription = expandedField?.schemaFieldEntity?.businessAttributes?.businessAttribute?.businessAttribute?.properties ?.description; @@ -87,12 +97,17 @@ export default function FieldDescription({ expandedField, editableFieldInfo }: P
Description - + + {isPropagated && } + {!!displayedDescription && ( + + )} +
{isSchemaEditable && ( )} diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/FieldProperties.tsx b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/FieldProperties.tsx index 8c88cdce95f06f..689a191f469f53 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/FieldProperties.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/FieldProperties.tsx @@ -4,6 +4,8 @@ import { SchemaField, StdDataType } from '../../../../../../../../types.generate import { SectionHeader, StyledDivider } from './components'; import { mapStructuredPropertyValues } from '../../../../Properties/useStructuredProperties'; import StructuredPropertyValue from '../../../../Properties/StructuredPropertyValue'; +import { EditColumn } from '../../../../Properties/Edit/EditColumn'; +import { useGetEntityWithSchema } from '../../useGetEntitySchema'; const PropertyTitle = styled.div` font-size: 14px; @@ -13,6 +15,8 @@ const PropertyTitle = styled.div` const PropertyWrapper = styled.div` margin-bottom: 12px; + display: flex; + justify-content: space-between; `; const PropertiesWrapper = styled.div` @@ -29,6 +33,7 @@ interface Props { export default function FieldProperties({ expandedField }: Props) { const { schemaFieldEntity } = expandedField; + const { refetch } = useGetEntityWithSchema(true); if (!schemaFieldEntity?.structuredProperties?.properties?.length) return null; @@ -43,23 +48,33 @@ export default function FieldProperties({ expandedField }: Props) { const hasMultipleValues = valuesData.length > 1; return ( - - {structuredProp.structuredProperty.definition.displayName} - {hasMultipleValues ? ( - - {valuesData.map((value) => ( -
  • + +
    + + {structuredProp.structuredProperty.definition.displayName} + + {hasMultipleValues ? ( + + {valuesData.map((value) => ( +
  • + +
  • + ))} +
    + ) : ( + <> + {valuesData.map((value) => ( - - ))} - - ) : ( - <> - {valuesData.map((value) => ( - - ))} - - )} + ))} + + )} + + v.value) || []} + refetch={refetch} + />
    ); })} diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/utils/getFieldDescriptionDetails.ts b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/utils/getFieldDescriptionDetails.ts new file mode 100644 index 00000000000000..6434baddb77a66 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/utils/getFieldDescriptionDetails.ts @@ -0,0 +1,25 @@ +import { EditableSchemaFieldInfo, SchemaFieldEntity } from '../../../../../../../types.generated'; + +interface Props { + schemaFieldEntity?: SchemaFieldEntity | null; + editableFieldInfo?: EditableSchemaFieldInfo; + defaultDescription?: string | null; +} + +export function getFieldDescriptionDetails({ schemaFieldEntity, editableFieldInfo, defaultDescription }: Props) { + const documentation = schemaFieldEntity?.documentation?.documentations?.[0]; + const isUsingDocumentationAspect = !editableFieldInfo?.description && !!documentation; + const isPropagated = + isUsingDocumentationAspect && + !!documentation?.attribution?.sourceDetail?.find( + (mapEntry) => mapEntry.key === 'propagated' && mapEntry.value === 'true', + ); + + const displayedDescription = + editableFieldInfo?.description || documentation?.documentation || defaultDescription || ''; + + const sourceDetail = documentation?.attribution?.sourceDetail; + const propagatedDescription = documentation?.documentation; + + return { displayedDescription, isPropagated, sourceDetail, propagatedDescription }; +} diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/utils/useDescriptionRenderer.tsx b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/utils/useDescriptionRenderer.tsx index 73e6d2ca6e9b3e..bb70c2cb493037 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/utils/useDescriptionRenderer.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/utils/useDescriptionRenderer.tsx @@ -6,6 +6,7 @@ import { useUpdateDescriptionMutation } from '../../../../../../../graphql/mutat import { useMutationUrn, useRefetch } from '../../../../EntityContext'; import { useSchemaRefetch } from '../SchemaContext'; import { pathMatchesNewPath } from '../../../../../dataset/profile/schema/utils/utils'; +import { getFieldDescriptionDetails } from './getFieldDescriptionDetails'; export default function useDescriptionRenderer(editableSchemaMetadata: EditableSchemaMetadata | null | undefined) { const urn = useMutationUrn(); @@ -21,10 +22,16 @@ export default function useDescriptionRenderer(editableSchemaMetadata: EditableS }; return (description: string, record: SchemaField, index: number): JSX.Element => { - const relevantEditableFieldInfo = editableSchemaMetadata?.editableSchemaFieldInfo.find( - (candidateEditableFieldInfo) => pathMatchesNewPath(candidateEditableFieldInfo.fieldPath, record.fieldPath), + const editableFieldInfo = editableSchemaMetadata?.editableSchemaFieldInfo.find((candidateEditableFieldInfo) => + pathMatchesNewPath(candidateEditableFieldInfo.fieldPath, record.fieldPath), ); - const displayedDescription = relevantEditableFieldInfo?.description || description; + const { schemaFieldEntity } = record; + const { displayedDescription, isPropagated, sourceDetail } = getFieldDescriptionDetails({ + schemaFieldEntity, + editableFieldInfo, + defaultDescription: description, + }); + const sanitizedDescription = DOMPurify.sanitize(displayedDescription); const original = record.description ? DOMPurify.sanitize(record.description) : undefined; const businessAttributeDescription = @@ -43,7 +50,7 @@ export default function useDescriptionRenderer(editableSchemaMetadata: EditableS baExpanded={!!expandedBARows[index]} description={sanitizedDescription} original={original} - isEdited={!!relevantEditableFieldInfo?.description} + isEdited={!!editableFieldInfo?.description} onUpdate={(updatedDescription) => updateDescription({ variables: { @@ -56,6 +63,8 @@ export default function useDescriptionRenderer(editableSchemaMetadata: EditableS }, }).then(refresh) } + isPropagated={isPropagated} + sourceDetail={sourceDetail} isReadOnly /> ); diff --git a/datahub-web-react/src/app/entity/shared/tabs/Properties/Edit/EditColumn.tsx b/datahub-web-react/src/app/entity/shared/tabs/Properties/Edit/EditColumn.tsx index ac50df6a5381ed..6a0599c0cdb33d 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Properties/Edit/EditColumn.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Properties/Edit/EditColumn.tsx @@ -1,16 +1,19 @@ import { Button } from 'antd'; import React, { useState } from 'react'; -import { PropertyRow } from '../types'; import EditStructuredPropertyModal from './EditStructuredPropertyModal'; +import { StructuredPropertyEntity } from '../../../../../../types.generated'; interface Props { - propertyRow: PropertyRow; + structuredProperty?: StructuredPropertyEntity; + associatedUrn?: string; + values?: (string | number | null)[]; + refetch?: () => void; } -export function EditColumn({ propertyRow }: Props) { +export function EditColumn({ structuredProperty, associatedUrn, values, refetch }: Props) { const [isEditModalVisible, setIsEditModalVisible] = useState(false); - if (!propertyRow.structuredProperty || propertyRow.structuredProperty?.definition.immutable) { + if (!structuredProperty || structuredProperty?.definition.immutable) { return null; } @@ -21,9 +24,11 @@ export function EditColumn({ propertyRow }: Props) { setIsEditModalVisible(false)} + refetch={refetch} /> ); diff --git a/datahub-web-react/src/app/entity/shared/tabs/Properties/Edit/EditStructuredPropertyModal.tsx b/datahub-web-react/src/app/entity/shared/tabs/Properties/Edit/EditStructuredPropertyModal.tsx index 73a280031ebd09..c8def8bef5e195 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Properties/Edit/EditStructuredPropertyModal.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Properties/Edit/EditStructuredPropertyModal.tsx @@ -1,7 +1,6 @@ import { Button, Modal, message } from 'antd'; -import React from 'react'; +import React, { useEffect, useMemo } from 'react'; import styled from 'styled-components'; -import { PropertyRow } from '../types'; import StructuredPropertyInput from '../../../components/styled/StructuredProperty/StructuredPropertyInput'; import { PropertyValueInput, StructuredPropertyEntity } from '../../../../../../types.generated'; import { useUpsertStructuredPropertiesMutation } from '../../../../../../graphql/structuredProperties.generated'; @@ -17,19 +16,33 @@ const Description = styled.div` interface Props { isOpen: boolean; - propertyRow: PropertyRow; structuredProperty: StructuredPropertyEntity; + associatedUrn?: string; + values?: (string | number | null)[]; closeModal: () => void; + refetch?: () => void; } -export default function EditStructuredPropertyModal({ isOpen, propertyRow, structuredProperty, closeModal }: Props) { - const { refetch } = useEntityContext(); - const urn = useMutationUrn(); - const initialValues = propertyRow.values?.map((v) => v.value) || []; - const { selectedValues, selectSingleValue, toggleSelectedValue, updateSelectedValues } = +export default function EditStructuredPropertyModal({ + isOpen, + structuredProperty, + associatedUrn, + values, + closeModal, + refetch, +}: Props) { + const { refetch: entityRefetch } = useEntityContext(); + const mutationUrn = useMutationUrn(); + const urn = associatedUrn || mutationUrn; + const initialValues = useMemo(() => values || [], [values]); + const { selectedValues, selectSingleValue, toggleSelectedValue, updateSelectedValues, setSelectedValues } = useEditStructuredProperty(initialValues); const [upsertStructuredProperties] = useUpsertStructuredPropertiesMutation(); + useEffect(() => { + setSelectedValues(initialValues); + }, [isOpen, initialValues, setSelectedValues]); + function upsertProperties() { message.loading('Updating...'); upsertStructuredProperties({ @@ -51,7 +64,11 @@ export default function EditStructuredPropertyModal({ isOpen, propertyRow, struc }, }) .then(() => { - refetch(); + if (refetch) { + refetch(); + } else { + entityRefetch(); + } message.destroy(); message.success('Successfully updated structured property!'); closeModal(); @@ -67,7 +84,7 @@ export default function EditStructuredPropertyModal({ isOpen, propertyRow, struc return ( { propertyTableColumns.push({ title: '', width: '10%', - render: (propertyRow: PropertyRow) => , + render: (propertyRow: PropertyRow) => ( + v.value) || []} + /> + ), } as any); } diff --git a/datahub-web-react/src/app/entity/shared/useGetEntities.ts b/datahub-web-react/src/app/entity/shared/useGetEntities.ts new file mode 100644 index 00000000000000..9391bc17d7a8a2 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/useGetEntities.ts @@ -0,0 +1,18 @@ +import { useEffect, useState } from 'react'; +import { useGetEntitiesQuery } from '../../../graphql/entity.generated'; +import { Entity } from '../../../types.generated'; + +export function useGetEntities(urns: string[]): Entity[] { + const [verifiedUrns, setVerifiedUrns] = useState([]); + + useEffect(() => { + urns.forEach((urn) => { + if (urn.startsWith('urn:li:') && !verifiedUrns.includes(urn)) { + setVerifiedUrns((prevUrns) => [...prevUrns, urn]); + } + }); + }, [urns, verifiedUrns]); + + const { data } = useGetEntitiesQuery({ variables: { urns: verifiedUrns }, skip: !verifiedUrns.length }); + return (data?.entities || []) as Entity[]; +} diff --git a/datahub-web-react/src/app/lineage/LineageExplorer.tsx b/datahub-web-react/src/app/lineage/LineageExplorer.tsx index 26ffaa26a6ca22..ce0c4bb8f122d4 100644 --- a/datahub-web-react/src/app/lineage/LineageExplorer.tsx +++ b/datahub-web-react/src/app/lineage/LineageExplorer.tsx @@ -221,7 +221,9 @@ export default function LineageExplorer({ urn, type }: Props) { Close {selectedEntity.type !== EntityType.Restricted && ( - )} diff --git a/datahub-web-react/src/app/search/SearchResultList.tsx b/datahub-web-react/src/app/search/SearchResultList.tsx index d85c3674cbd43d..bc0efcfa3f47e8 100644 --- a/datahub-web-react/src/app/search/SearchResultList.tsx +++ b/datahub-web-react/src/app/search/SearchResultList.tsx @@ -62,6 +62,7 @@ type Props = { selectedEntities: EntityAndType[]; setSelectedEntities: (entities: EntityAndType[]) => any; suggestions: SearchSuggestion[]; + pageNumber: number; }; export const SearchResultList = ({ @@ -73,6 +74,7 @@ export const SearchResultList = ({ selectedEntities, setSelectedEntities, suggestions, + pageNumber, }: Props) => { const entityRegistry = useEntityRegistry(); const selectedEntityUrns = selectedEntities.map((entity) => entity.urn); @@ -86,6 +88,7 @@ export const SearchResultList = ({ entityType: result.entity.type, index, total: totalResultCount, + pageNumber, }); }; diff --git a/datahub-web-react/src/app/search/SearchResults.tsx b/datahub-web-react/src/app/search/SearchResults.tsx index dafe9a20b6ab7f..e96e8fd528b9e6 100644 --- a/datahub-web-react/src/app/search/SearchResults.tsx +++ b/datahub-web-react/src/app/search/SearchResults.tsx @@ -264,6 +264,7 @@ export const SearchResults = ({ selectedEntities={selectedEntities} setSelectedEntities={setSelectedEntities} suggestions={suggestions} + pageNumber={page} /> {totalResults > 0 && ( diff --git a/datahub-web-react/src/app/settings/SettingsPage.tsx b/datahub-web-react/src/app/settings/SettingsPage.tsx index 24bcd17ca7f9c0..e3948349546efb 100644 --- a/datahub-web-react/src/app/settings/SettingsPage.tsx +++ b/datahub-web-react/src/app/settings/SettingsPage.tsx @@ -121,7 +121,7 @@ export const SettingsPage = () => { const showViews = isViewsEnabled || false; const showOwnershipTypes = me && me?.platformPrivileges?.manageOwnershipTypes; const showHomePagePosts = me && me?.platformPrivileges?.manageGlobalAnnouncements && !readOnlyModeEnabled; - const showFeatures = true; // TODO: Add feature flag for this + const showFeatures = me?.platformPrivileges?.manageIngestion; // TODO: Add feature flag for this return ( diff --git a/datahub-web-react/src/app/settings/features/Feature.tsx b/datahub-web-react/src/app/settings/features/Feature.tsx index 2c090aae696f88..13453cf8f73252 100644 --- a/datahub-web-react/src/app/settings/features/Feature.tsx +++ b/datahub-web-react/src/app/settings/features/Feature.tsx @@ -104,6 +104,8 @@ export interface FeatureType { title: string; description: string; isAvailable: boolean; + isDisabled: boolean; + disabledMessage?: string; checked: boolean; onChange?: (checked: boolean) => void; }>; @@ -134,22 +136,6 @@ export const Feature = ({ key, title, description, settings, options, isNew, lea - {settings.map((option) => ( - <> - - - - {option.title} - - - - - - - - ))} {options.map((option, index) => ( <> @@ -165,15 +151,34 @@ export const Feature = ({ key, title, description, settings, options, isNew, lea {option.description} - (option.onChange ? option.onChange(checked) : null)} - disabled={!option.isAvailable} - /> + + (option.onChange ? option.onChange(checked) : null)} + disabled={!option.isAvailable || option.isDisabled} + /> + {index !== options.length - 1 && } ))} + {settings.map((option) => ( + <> + + + + {option.title} + Only available on DataHub Cloud + + + + + + + + ))} ); diff --git a/datahub-web-react/src/app/settings/features/Features.tsx b/datahub-web-react/src/app/settings/features/Features.tsx index ee8d7c628c1eff..1d0a0bb469cf86 100644 --- a/datahub-web-react/src/app/settings/features/Features.tsx +++ b/datahub-web-react/src/app/settings/features/Features.tsx @@ -73,18 +73,23 @@ export const Features = () => { setIsColPropagateChecked(checked); updateDocPropagation(checked); }, + isDisabled: false, + disabledMessage: undefined, }, { key: uuidv4(), title: 'Asset Level Propagation', description: 'Propagate new documentation from upstream to downstream assets based on data lineage relationships.', - isAvailable: false, checked: false, + onChange: (_: boolean) => null, + isAvailable: true, + isDisabled: true, + disabledMessage: 'Coming soon!', }, ], isNew: true, - learnMoreLink: 'https://datahubproject.io/docs/automations/doc-propagation', + learnMoreLink: 'https://datahubproject.io/docs/automations/docs-propagation', }, ]; diff --git a/datahub-web-react/src/app/useAppConfig.ts b/datahub-web-react/src/app/useAppConfig.ts index 12470acdf64eef..3b7e89166c95a5 100644 --- a/datahub-web-react/src/app/useAppConfig.ts +++ b/datahub-web-react/src/app/useAppConfig.ts @@ -28,6 +28,11 @@ export function useIsAppConfigContextLoaded() { return appConfig.loaded; } +export function useIsEditableDatasetNameEnabled() { + const appConfig = useAppConfig(); + return appConfig.config.featureFlags.editableDatasetNameEnabled; +} + export function useIsShowSeparateSiblingsEnabled() { const appConfig = useAppConfig(); return appConfig.config.featureFlags.showSeparateSiblings; diff --git a/datahub-web-react/src/appConfigContext.tsx b/datahub-web-react/src/appConfigContext.tsx index 54547aaa5642e0..f119f8bc7d53af 100644 --- a/datahub-web-react/src/appConfigContext.tsx +++ b/datahub-web-react/src/appConfigContext.tsx @@ -54,6 +54,7 @@ export const DEFAULT_APP_CONFIG = { platformBrowseV2: false, businessAttributeEntityEnabled: false, dataContractsEnabled: false, + editableDatasetNameEnabled: false, showSeparateSiblings: false, }, }; diff --git a/datahub-web-react/src/graphql/app.graphql b/datahub-web-react/src/graphql/app.graphql index e058a6fbb58e00..2a0baf238761f3 100644 --- a/datahub-web-react/src/graphql/app.graphql +++ b/datahub-web-react/src/graphql/app.graphql @@ -69,6 +69,7 @@ query appConfig { platformBrowseV2 businessAttributeEntityEnabled dataContractsEnabled + editableDatasetNameEnabled showSeparateSiblings } } diff --git a/datahub-web-react/src/graphql/browse.graphql b/datahub-web-react/src/graphql/browse.graphql index 62ae2168d857ac..dd4445947520fb 100644 --- a/datahub-web-react/src/graphql/browse.graphql +++ b/datahub-web-react/src/graphql/browse.graphql @@ -17,6 +17,7 @@ query getBrowseResults($input: BrowseInput!) { description } editableProperties { + name description } platform { diff --git a/datahub-web-react/src/graphql/fragments.graphql b/datahub-web-react/src/graphql/fragments.graphql index adaa8aa2535810..e5bbb5f0dc29d5 100644 --- a/datahub-web-react/src/graphql/fragments.graphql +++ b/datahub-web-react/src/graphql/fragments.graphql @@ -251,6 +251,7 @@ fragment nonRecursiveDatasetFields on Dataset { } } editableProperties { + name description } ownership { diff --git a/datahub-web-react/src/graphql/preview.graphql b/datahub-web-react/src/graphql/preview.graphql index e675f142a1eeb5..1bee614dd7adbe 100644 --- a/datahub-web-react/src/graphql/preview.graphql +++ b/datahub-web-react/src/graphql/preview.graphql @@ -9,6 +9,7 @@ fragment entityPreview on Entity { ...platformFields } editableProperties { + name description } platformNativeType diff --git a/datahub-web-react/src/graphql/search.graphql b/datahub-web-react/src/graphql/search.graphql index 547f8351f6a2a6..38c9df0a636d0f 100644 --- a/datahub-web-react/src/graphql/search.graphql +++ b/datahub-web-react/src/graphql/search.graphql @@ -335,6 +335,7 @@ fragment nonSiblingsDatasetSearchFields on Dataset { ...dataPlatformInstanceFields } editableProperties { + name description } access { diff --git a/docker/datahub-frontend/Dockerfile b/docker/datahub-frontend/Dockerfile index 2a9354cbf6a04f..89974e56575b07 100644 --- a/docker/datahub-frontend/Dockerfile +++ b/docker/datahub-frontend/Dockerfile @@ -25,7 +25,7 @@ RUN apk --no-cache --update-cache --available upgrade \ ENV LD_LIBRARY_PATH="/lib:/lib64" -FROM base as unpack +FROM base AS unpack COPY ./datahub-frontend.zip / RUN unzip datahub-frontend.zip -d /tmp/out \ @@ -33,16 +33,16 @@ RUN unzip datahub-frontend.zip -d /tmp/out \ COPY ./docker/monitoring/client-prometheus-config.yaml /datahub-frontend/ RUN chown -R datahub:datahub /datahub-frontend && chmod 755 /datahub-frontend -FROM base as prod-install +FROM base AS prod-install COPY --from=unpack /datahub-frontend/ /datahub-frontend/ -FROM base as dev-install +FROM base AS dev-install # Dummy stage for development. Assumes code is built on your machine and mounted to this image. # See this excellent thread https://github.com/docker/cli/issues/1134 VOLUME [ "/datahub-frontend" ] -FROM ${APP_ENV}-install as final +FROM ${APP_ENV}-install AS final COPY --chown=datahub:datahub --chmod=755 ./docker/datahub-frontend/start.sh / USER datahub diff --git a/docker/datahub-gms/Dockerfile b/docker/datahub-gms/Dockerfile index d30dbd84930578..b15bf3c6f9f17b 100644 --- a/docker/datahub-gms/Dockerfile +++ b/docker/datahub-gms/Dockerfile @@ -11,7 +11,7 @@ FROM golang:1-alpine3.20 AS binary # Re-declaring arg from above to make it available in this stage (will inherit default value) ARG ALPINE_REPO_URL -ENV DOCKERIZE_VERSION v0.6.1 +ENV DOCKERIZE_VERSION=v0.6.1 WORKDIR /go/src/github.com/jwilder # Optionally set corporate mirror for apk @@ -52,7 +52,7 @@ COPY --from=binary /go/bin/dockerize /usr/local/bin ENV LD_LIBRARY_PATH="/lib:/lib64" -FROM base as prod-install +FROM base AS prod-install COPY war.war /datahub/datahub-gms/bin/war.war COPY metadata-models/src/main/resources/entity-registry.yml /datahub/datahub-gms/resources/entity-registry.yml COPY docker/datahub-gms/start.sh /datahub/datahub-gms/scripts/start.sh @@ -61,11 +61,11 @@ COPY docker/datahub-gms/jetty-jmx.xml /datahub/datahub-gms/scripts/jetty-jmx.xml COPY docker/monitoring/client-prometheus-config.yaml /datahub/datahub-gms/scripts/prometheus-config.yaml RUN chmod +x /datahub/datahub-gms/scripts/start.sh -FROM base as dev-install +FROM base AS dev-install # Dummy stage for development. Assumes code is built on your machine and mounted to this image. # See this excellent thread https://github.com/docker/cli/issues/1134 -FROM ${APP_ENV}-install as final +FROM ${APP_ENV}-install AS final RUN mkdir -p /etc/datahub/plugins/auth/resources diff --git a/docker/datahub-ingestion-base/Dockerfile b/docker/datahub-ingestion-base/Dockerfile index 8a238c32704bb6..a2686ee8b6557f 100644 --- a/docker/datahub-ingestion-base/Dockerfile +++ b/docker/datahub-ingestion-base/Dockerfile @@ -7,28 +7,13 @@ ARG GITHUB_REPO_URL=https://github.com ARG DEBIAN_REPO_URL=https://deb.debian.org/debian ARG PIP_MIRROR_URL=https://pypi.python.org/simple -FROM golang:1-alpine3.20 AS dockerize-binary +FROM powerman/dockerize:0.19 as dockerize-binary -# Re-declaring arg from above to make it available in this stage (will inherit default value) -ARG ALPINE_REPO_URL - -ENV DOCKERIZE_VERSION v0.6.1 -WORKDIR /go/src/github.com/jwilder - -# Optionally set corporate mirror for apk -RUN if [ "${ALPINE_REPO_URL}" != "http://dl-cdn.alpinelinux.org/alpine" ] ; then sed -i "s#http.*://dl-cdn.alpinelinux.org/alpine#${ALPINE_REPO_URL}#g" /etc/apk/repositories ; fi - -RUN apk --no-cache --update add openssl git tar curl - -WORKDIR /go/src/github.com/jwilder/dockerize - -RUN go install github.com/jwilder/dockerize@$DOCKERIZE_VERSION - -FROM python:3.10 as base +FROM python:3.10 AS base ARG GITHUB_REPO_URL -ENV DEBIAN_FRONTEND noninteractive +ENV DEBIAN_FRONTEND=noninteractive # Optionally set corporate mirror for deb ARG DEBIAN_REPO_URL @@ -56,8 +41,7 @@ RUN apt-get update && apt-get install -y -qq \ && python -m pip install --no-cache --upgrade pip uv>=0.1.10 wheel setuptools \ && rm -rf /var/lib/apt/lists/* /var/cache/apk/* -# compiled against newer golang for security fixes -COPY --from=dockerize-binary /go/bin/dockerize /usr/local/bin +COPY --from=dockerize-binary /usr/local/bin/dockerize /usr/local/bin COPY ./docker/datahub-ingestion-base/base-requirements.txt requirements.txt COPY ./docker/datahub-ingestion-base/entrypoint.sh /entrypoint.sh @@ -75,7 +59,7 @@ RUN python3 -m venv $VIRTUAL_ENV && \ ENTRYPOINT [ "/entrypoint.sh" ] -FROM ${BASE_IMAGE} as full-install +FROM ${BASE_IMAGE} AS full-install USER 0 RUN apt-get update && apt-get install -y -qq \ @@ -102,7 +86,7 @@ RUN if [ $(arch) = "x86_64" ]; then \ USER datahub -FROM ${BASE_IMAGE} as slim-install +FROM ${BASE_IMAGE} AS slim-install # Do nothing else on top of base FROM ${APP_ENV}-install diff --git a/docker/datahub-ingestion/Dockerfile b/docker/datahub-ingestion/Dockerfile index b8eda548491224..34ac6ae9eba584 100644 --- a/docker/datahub-ingestion/Dockerfile +++ b/docker/datahub-ingestion/Dockerfile @@ -5,7 +5,7 @@ ARG DOCKER_VERSION=head-full ARG DEBIAN_REPO_URL=https://deb.debian.org/debian ARG PIP_MIRROR_URL=https://pypi.python.org/simple -FROM $BASE_IMAGE:$DOCKER_VERSION as base +FROM $BASE_IMAGE:$DOCKER_VERSION AS base # Optionally set corporate mirror for deb USER 0 @@ -28,11 +28,11 @@ RUN sed -i.bak "s/__version__ = \"1\!0.0.0.dev0\"/__version__ = \"$(echo $RELEAS cat src/datahub/__init__.py | grep __version__ && \ cat airflow-plugin/src/datahub_airflow_plugin/__init__.py | grep __version__ -FROM base as slim-install +FROM base AS slim-install RUN uv pip install --no-cache -e ".[base,datahub-rest,datahub-kafka,snowflake,bigquery,redshift,mysql,postgres,hive,clickhouse,glue,dbt,looker,lookml,tableau,powerbi,superset,datahub-business-glossary]" -FROM base as full-install-build +FROM base AS full-install-build USER 0 RUN apt-get update && apt-get install -y -qq maven @@ -44,14 +44,14 @@ RUN uv pip install --no-cache -e ".[base,all]" "./airflow-plugin[plugin-v2]" && datahub --version RUN ./pyspark_jars.sh -FROM base as full-install +FROM base AS full-install COPY --from=full-install-build ${VIRTUAL_ENV} ${VIRTUAL_ENV} -FROM base as dev-install +FROM base AS dev-install # Dummy stage for development. Assumes code is built on your machine and mounted to this image. # See this excellent thread https://github.com/docker/cli/issues/1134 -FROM ${APP_ENV}-install as final +FROM ${APP_ENV}-install AS final USER datahub diff --git a/docker/datahub-mae-consumer/Dockerfile b/docker/datahub-mae-consumer/Dockerfile index 0ee55821f2579f..6edaa29ee1a8bb 100644 --- a/docker/datahub-mae-consumer/Dockerfile +++ b/docker/datahub-mae-consumer/Dockerfile @@ -11,7 +11,7 @@ FROM golang:1-alpine3.20 AS binary # Re-declaring arg from above to make it available in this stage (will inherit default value) ARG ALPINE_REPO_URL -ENV DOCKERIZE_VERSION v0.6.1 +ENV DOCKERIZE_VERSION=v0.6.1 WORKDIR /go/src/github.com/jwilder # Optionally set corporate mirror for apk @@ -47,18 +47,18 @@ COPY --from=binary /go/bin/dockerize /usr/local/bin ENV LD_LIBRARY_PATH="/lib:/lib64" -FROM base as prod-install +FROM base AS prod-install COPY mae-consumer-job.jar /datahub/datahub-mae-consumer/bin/ COPY metadata-models/src/main/resources/entity-registry.yml /datahub/datahub-mae-consumer/resources/entity-registry.yml COPY docker/datahub-mae-consumer/start.sh /datahub/datahub-mae-consumer/scripts/ COPY docker/monitoring/client-prometheus-config.yaml /datahub/datahub-mae-consumer/scripts/prometheus-config.yaml RUN chmod +x /datahub/datahub-mae-consumer/scripts/start.sh -FROM base as dev-install +FROM base AS dev-install # Dummy stage for development. Assumes code is built on your machine and mounted to this image. # See this excellent thread https://github.com/docker/cli/issues/1134 -FROM ${APP_ENV}-install as final +FROM ${APP_ENV}-install AS final RUN addgroup -S datahub && adduser -S datahub -G datahub USER datahub diff --git a/docker/datahub-mce-consumer/Dockerfile b/docker/datahub-mce-consumer/Dockerfile index 8f85b432a10711..1eb56633c561e6 100644 --- a/docker/datahub-mce-consumer/Dockerfile +++ b/docker/datahub-mce-consumer/Dockerfile @@ -11,7 +11,7 @@ FROM golang:1-alpine3.20 AS binary # Re-declaring arg from above to make it available in this stage (will inherit default value) ARG ALPINE_REPO_URL -ENV DOCKERIZE_VERSION v0.6.1 +ENV DOCKERIZE_VERSION=v0.6.1 WORKDIR /go/src/github.com/jwilder # Optionally set corporate mirror for apk @@ -45,7 +45,7 @@ RUN apk --no-cache --update-cache --available upgrade \ && cp /usr/lib/jvm/java-17-openjdk/jre/lib/security/cacerts /tmp/kafka.client.truststore.jks COPY --from=binary /go/bin/dockerize /usr/local/bin -FROM base as prod-install +FROM base AS prod-install COPY mce-consumer-job.jar /datahub/datahub-mce-consumer/bin/ COPY metadata-models/src/main/resources/entity-registry.yml /datahub/datahub-mce-consumer/resources/entity-registry.yml COPY docker/datahub-mce-consumer/start.sh /datahub/datahub-mce-consumer/scripts/ @@ -54,12 +54,12 @@ RUN chmod +x /datahub/datahub-mce-consumer/scripts/start.sh ENV LD_LIBRARY_PATH="/lib:/lib64" -FROM base as dev-install +FROM base AS dev-install # Dummy stage for development. Assumes code is built on your machine and mounted to this image. # See this excellent thread https://github.com/docker/cli/issues/1134 COPY metadata-models/src/main/resources/entity-registry.yml /datahub/datahub-mce-consumer/resources/entity-registry.yml -FROM ${APP_ENV}-install as final +FROM ${APP_ENV}-install AS final RUN addgroup -S datahub && adduser -S datahub -G datahub USER datahub diff --git a/docker/datahub-upgrade/Dockerfile b/docker/datahub-upgrade/Dockerfile index 675e24ab87109e..3d59a903414b1a 100644 --- a/docker/datahub-upgrade/Dockerfile +++ b/docker/datahub-upgrade/Dockerfile @@ -11,7 +11,7 @@ FROM golang:1-alpine3.20 AS binary # Re-declaring arg from above to make it available in this stage (will inherit default value) ARG ALPINE_REPO_URL -ENV DOCKERIZE_VERSION v0.6.1 +ENV DOCKERIZE_VERSION=v0.6.1 WORKDIR /go/src/github.com/jwilder # Optionally set corporate mirror for apk @@ -51,15 +51,15 @@ COPY --from=binary /go/bin/dockerize /usr/local/bin ENV LD_LIBRARY_PATH="/lib:/lib64" -FROM base as prod-install +FROM base AS prod-install COPY datahub-upgrade.jar /datahub/datahub-upgrade/bin/ COPY metadata-models/src/main/resources/entity-registry.yml /datahub/datahub-gms/resources/entity-registry.yml -FROM base as dev-install +FROM base AS dev-install # Dummy stage for development. Assumes code is built on your machine and mounted to this image. # See this excellent thread https://github.com/docker/cli/issues/1134 -FROM ${APP_ENV}-install as final +FROM ${APP_ENV}-install AS final RUN addgroup -S datahub && adduser -S datahub -G datahub USER datahub diff --git a/docker/elasticsearch-setup/Dockerfile b/docker/elasticsearch-setup/Dockerfile index 7390e3579dcf8f..4e64dcbc1e452c 100644 --- a/docker/elasticsearch-setup/Dockerfile +++ b/docker/elasticsearch-setup/Dockerfile @@ -10,7 +10,7 @@ FROM golang:1-alpine3.20 AS binary ARG ALPINE_REPO_URL -ENV DOCKERIZE_VERSION v0.6.1 +ENV DOCKERIZE_VERSION=v0.6.1 WORKDIR /go/src/github.com/jwilder # Optionally set corporate mirror for apk diff --git a/docker/kafka-setup/Dockerfile b/docker/kafka-setup/Dockerfile index a68da4e41d4df9..ad1d01c1ce97c0 100644 --- a/docker/kafka-setup/Dockerfile +++ b/docker/kafka-setup/Dockerfile @@ -6,8 +6,8 @@ ARG GITHUB_REPO_URL=https://github.com ARG MAVEN_CENTRAL_REPO_URL=https://repo1.maven.org/maven2 ARG APACHE_DOWNLOAD_URL=null -# Using as a base image because to get the needed jars for confluent utils -FROM confluentinc/cp-base-new:$KAFKA_DOCKER_VERSION as confluent_base +# Using AS a base image because to get the needed jars for confluent utils +FROM confluentinc/cp-base-new:$KAFKA_DOCKER_VERSION AS confluent_base ARG MAVEN_CENTRAL_REPO_URL ARG SNAKEYAML_VERSION="2.0" @@ -22,8 +22,8 @@ ARG ALPINE_REPO_URL ARG APACHE_DOWNLOAD_URL ARG GITHUB_REPO_URL -ENV KAFKA_VERSION 3.7.0 -ENV SCALA_VERSION 2.13 +ENV KAFKA_VERSION=3.7.0 +ENV SCALA_VERSION=2.13 LABEL name="kafka" version=${KAFKA_VERSION} @@ -44,7 +44,7 @@ RUN mkdir -p /opt \ && rm -rf /tmp/* \ && apk del --purge .build-deps -ENV PATH /sbin:/opt/kafka/bin/:$PATH +ENV PATH=/sbin:/opt/kafka/bin/:$PATH WORKDIR /opt/kafka @@ -71,6 +71,7 @@ COPY docker/kafka-setup/kafka-setup.sh ./kafka-setup.sh COPY docker/kafka-setup/kafka-config.sh ./kafka-config.sh COPY docker/kafka-setup/kafka-topic-workers.sh ./kafka-topic-workers.sh COPY docker/kafka-setup/kafka-ready.sh ./kafka-ready.sh +COPY docker/kafka-setup/env_to_properties.py ./env_to_properties.py RUN chmod +x ./kafka-setup.sh ./kafka-topic-workers.sh ./kafka-ready.sh diff --git a/docker/mysql-setup/Dockerfile b/docker/mysql-setup/Dockerfile index 46969352d81746..b0ca45ad8f6f24 100644 --- a/docker/mysql-setup/Dockerfile +++ b/docker/mysql-setup/Dockerfile @@ -5,7 +5,7 @@ FROM golang:1-alpine3.20 AS binary ARG ALPINE_REPO_URL -ENV DOCKERIZE_VERSION v0.6.1 +ENV DOCKERIZE_VERSION=v0.6.1 WORKDIR /go/src/github.com/jwilder # Optionally set corporate mirror for apk diff --git a/docker/postgres-setup/Dockerfile b/docker/postgres-setup/Dockerfile index 8ab211218f2406..e145456e807d4d 100644 --- a/docker/postgres-setup/Dockerfile +++ b/docker/postgres-setup/Dockerfile @@ -5,7 +5,7 @@ FROM golang:1-alpine3.20 AS binary ARG ALPINE_REPO_URL -ENV DOCKERIZE_VERSION v0.6.1 +ENV DOCKERIZE_VERSION=v0.6.1 WORKDIR /go/src/github.com/jwilder # Optionally set corporate mirror for apk diff --git a/docker/profiles/docker-compose.frontend.yml b/docker/profiles/docker-compose.frontend.yml index b43db8297cb1e0..b5b2d50143927f 100644 --- a/docker/profiles/docker-compose.frontend.yml +++ b/docker/profiles/docker-compose.frontend.yml @@ -10,6 +10,7 @@ x-datahub-frontend-service: &datahub-frontend-service - ${DATAHUB_LOCAL_FRONTEND_ENV:-empty2.env} environment: &datahub-frontend-service-env KAFKA_BOOTSTRAP_SERVER: broker:29092 + DATAHUB_GMS_HOST: ${DATAHUB_GMS_HOST:-datahub-gms} volumes: - ${HOME}/.datahub/plugins:/etc/datahub/plugins diff --git a/docker/profiles/docker-compose.gms.yml b/docker/profiles/docker-compose.gms.yml index 8cfff2280e2fea..c9448fa34c6870 100644 --- a/docker/profiles/docker-compose.gms.yml +++ b/docker/profiles/docker-compose.gms.yml @@ -40,6 +40,7 @@ x-kafka-env: &kafka-env # KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 SCHEMA_REGISTRY_TYPE: INTERNAL KAFKA_SCHEMAREGISTRY_URL: http://datahub-gms:8080/schema-registry/api/ + SPRING_KAFKA_CONSUMER_AUTO_OFFSET_RESET: ${SPRING_KAFKA_CONSUMER_AUTO_OFFSET_RESET:-earliest} x-datahub-quickstart-telemetry-env: &datahub-quickstart-telemetry-env DATAHUB_SERVER_TYPE: ${DATAHUB_SERVER_TYPE:-quickstart} diff --git a/docker/profiles/docker-compose.prerequisites.yml b/docker/profiles/docker-compose.prerequisites.yml index 7cd9c9039539cc..eed23a749628fe 100644 --- a/docker/profiles/docker-compose.prerequisites.yml +++ b/docker/profiles/docker-compose.prerequisites.yml @@ -234,7 +234,7 @@ services: env_file: kafka-broker/env/docker.env environment: KAFKA_NODE_ID: 1 - KAFKA_ADVERTISED_LISTENERS: BROKER://broker:29092,EXTERNAL://broker:9092 + KAFKA_ADVERTISED_LISTENERS: BROKER://broker:29092,EXTERNAL://localhost:9092 KAFKA_LISTENERS: BROKER://broker:29092,EXTERNAL://broker:9092,CONTROLLER://broker:39092 KAFKA_INTER_BROKER_LISTENER_NAME: BROKER KAFKA_CONTROLLER_LISTENER_NAMES: CONTROLLER diff --git a/docs-website/adoptionStoriesIndexes.json b/docs-website/adoptionStoriesIndexes.json new file mode 100644 index 00000000000000..d54dd6bcfa4f0a --- /dev/null +++ b/docs-website/adoptionStoriesIndexes.json @@ -0,0 +1,341 @@ +{ + "companies": [ + { + "name": "Netflix", + "slug": "netflix", + "imageUrl": "/img/logos/companies/netflix.png", + "imageSize": "large", + "link": "https://blog.datahubproject.io/how-netflix-is-collaborating-with-datahub-to-enhance-its-extensibility-a34d33f45947", + "linkType": "blog", + "tagline": "How Netflix is collaborating with DataHub to enhance its extensibility", + "category": "B2B & B2C", + "description": "\"DataHub gave us the extensibility features we needed to define new entity types easily and augment existing ones. DataHub performed exceptionally well in managing our traffic load and data volume. It offers a great developer experience, a well-documented taxonomy, and — very importantly — solid community support.\"

    — Ajoy Majumdar, Software Architect at Netflix

    " + }, + { + "name": "Visa", + "slug": "visa", + "imageUrl": "/img/logos/companies/visa.png", + "imageSize": "large", + "link": "https://blog.datahubproject.io/how-visa-uses-datahub-to-scale-data-governance-cace052d61c5", + "linkType": "blog", + "tagline": "How Visa uses DataHub to scale data governance", + "category": "Financial & Fintech", + "description": "\"We found DataHub to provide excellent coverage for our needs. What we appreciate most about DataHub is its powerful API platform.\"

    — Jean-Pierre Dijcks, Sr. Dir. Product Management at VISA

    " + }, + { + "name": "Optum", + "slug": "optum", + "imageUrl": "/img/logos/companies/optum.jpg", + "imageSize": "medium", + "link": "https://opensource.optum.com/blog/2022/03/23/data-mesh-via-datahub", + "linkType": "blog", + "tagline": "Data Mesh via DataHub", + "category": "And More", + "description": "“DataHub’s event driven architecture provides us a mechanism to act on any metadata changes in real time. This allows us to perform various actions like provisioning access to a data product, notifying consumers on any schema changes that may affect their application or triggering data movement jobs to move data from source to sink platforms.”" + }, + { + "name": "Pinterest", + "slug": "pinterest", + "imageUrl": "/img/logos/companies/pinterest.png", + "imageSize": "small", + "link": "https://www.youtube.com/watch?v=YoxTg8tQSwg&feature=youtu.be", + "linkType": "blog", + "tagline": "DataHub Project at Pinterest", + "category": "B2B & B2C", + "description": "Pinterest adopted a DataHub project to enhance metadata management for its big data query platform, facilitating better data navigation and understanding." + }, + { + "name": "Airtel", + "slug": "airtel", + "imageUrl": "/img/logos/companies/airtel.png", + "imageSize": "large", + "link": "https://www.youtube.com/watch?v=yr24mM91BN4", + "linkType": "video", + "tagline": "A transformative journey to Airtel's data mesh architecture with DataHub", + "category": "B2B & B2C", + "description": "Airtel is a leading global telecommunication provider. DataHub is the bedrock of Data Mesh at Airtel by providing the requisite governance and metadata management functionality to ensure their Data Products are discoverable, addressable, trustworthy, self-describing, and secure.

    Get a closer look at how the Airtel team has successfully integrated DataHub to take their data mesh implementation to the next level." + }, + { + "name": "Coursera", + "slug": "coursera", + "imageUrl": "/img/logos/companies/coursera.svg", + "imageSize": "small", + "link": "https://www.youtube.com/watch?v=bd5v4fn4d4s", + "linkType": "video", + "tagline": "Coursera's DataHub Journey", + "category": "B2B & B2C", + "description": "“DataHub aligns with our needs [for] data documentation, a unified search experience, lineage information, and additional metadata. We are also very impressed with the vibrant and supportive community.”" + }, + { + "name": "Zynga", + "slug": "zynga", + "imageUrl": "/img/logos/companies/zynga.png", + "imageSize": "default", + "link": "https://www.youtube.com/watch?v=VCU3-Hd_glI", + "linkType": "video", + "tagline": "Zynga's DataHub Implementation", + "category": "B2B & B2C", + "description": "“We looked around for data catalog tool, and DataHub was a clear winner.”

    Zynga levels up data management using DataHub, highlighting its role in enhancing data management, tracing data lineage, and ensuring data quality." + }, + { + "name": "Chime", + "slug": "chime", + "imageUrl": "/img/logos/companies/chime.png", + "imageSize": "default", + "link": "https://www.youtube.com/watch?v=GktS-XJhK30", + "linkType": "video", + "tagline": "A Story of schema, contracts, and data discovery", + "category": "Financial & Fintech", + "description": "“At Chime, DataHub serves as our guide for navigating the complexities of data cataloging and discovery.”

    Chime uses Datahub (DataHub Cloud) for ingesting metadata from diverse infrastructure components like Snowflake, Looker, Terraform, and so many others." + }, + { + "name": "Checkout.com", + "slug": "checkout-com", + "imageUrl": "/img/logos/companies/checkout-com.svg", + "imageSize": "small", + "link": "https://www.youtube.com/watch?v=emkpKO0bTkI", + "linkType": "video", + "tagline": "Self-Serve Data Governance with DataHub Action Framework", + "category": "Financial & Fintech", + "description": "Discover how Checkout leverage DataHub for advanced data management and compliance, especially in managing sensitive data types." + }, + { + "name": "MediaMarkt Saturn", + "slug": "mediamarkt-saturn", + "imageUrl": "/img/logos/companies/mediamarkt-saturn.png", + "imageSize": "large", + "link": "https://www.youtube.com/watch?v=wsCFnElN_Wo", + "linkType": "video", + "tagline": "DataHub + MediaMarktSaturn Access Management Journey", + "category": "B2B & B2C", + "description": "Europe’s #1 consumer electronics retailer implemented DataHub for three reasons:

    1. DataHub provides an extremely flexible and customizable metadata platform at scale.
    2. Open-source means lower cost to implement and removes the headache of license management.
    3. Community-driven project which continually evolves with industry trends and best practices." + }, + { + "name": "Adevinta", + "slug": "adevinta", + "imageUrl": "/img/logos/companies/adevinta.png", + "imageSize": "medium", + "link": "https://medium.com/@adevinta/building-the-data-catalogue-the-beginning-of-a-journey-d64e828f955c", + "linkType": "blog", + "tagline": "Building the data catalogue", + "category": "E-Commerce", + "description": "“DataHub allows us to solve the data discovery problem, which was a big challenge in our organization, and now we are solving it.”" + }, + { + "name": "Wolt", + "slug": "wolt", + "imageUrl": "/img/logos/companies/wolt.png", + "imageSize": "default", + "link": "https://blog.datahubproject.io/humans-of-datahub-fredrik-sannholm-d673b1877f2b", + "linkType": "blog", + "tagline": "Wolt's DataHub Integration", + "category": "E-Commerce", + "description": "“[DataHub] has made our legal team very happy with being able to keep track of our sensitive data [to answer questions like] Where’s it going? How’s it being processed? Where’s it ending up? Which third party tool or API’s are we sending it to and why? Who is responsible for this integration?”" + }, + { + "name": "Geotab", + "slug": "geotab", + "imageUrl": "/img/logos/companies/geotab.jpg", + "imageSize": "small", + "link": "https://www.youtube.com/watch?v=boyjT2OrlU4", + "linkType": "video", + "tagline": "Geotab's Experience with DataHub", + "category": "B2B & B2C", + "description": "“The key evaluation metric for selecting DataHub was the approachability and technical capabilities of its leading development team.”

    Geotab’s data adoption journey explores challenges in data management, governance, and the decision to utilize DataHub for improved productivity and collaboration." + }, + { + "name": "Hurb", + "slug": "hurb", + "imageUrl": "/img/logos/companies/hurb.png", + "imageSize": "medium", + "link": "https://blog.datahubproject.io/humans-of-datahub-patrick-franco-braz-b02b55a4c5384", + "linkType": "blog", + "tagline": "Hurb's DataHub Journey", + "category": "B2B & B2C", + "description": "“The main points that drove our decision to implement DataHub were its user-friendly interface, active and receptive community, contribution opportunities, and built-in ingestion sources for our primary services.”

    Hurb implemented DataHub to enhance data governance, streamline data access, and improve decision-making through a structured integration process." + }, + { + "name": "Grofers", + "slug": "grofers", + "imageUrl": "/img/logos/companies/grofers.png", + "imageSize": "medium", + "link": "https://www.youtube.com/watch?v=m9kUYAuezFI", + "linkType": "video", + "tagline": "Grofers' Success with DataHub", + "category": "E-Commerce", + "description": "Grofers provides a closer look into how their team has leveraged DataHub as the source of truth for data governance." + }, + { + "name": "Viasat", + "slug": "viasat", + "imageUrl": "/img/logos/companies/viasat.png", + "imageSize": "medium", + "link": "https://www.youtube.com/watch?v=2SrDAJnzkjE", + "linkType": "video", + "tagline": "Viasat's DataHub Implementation", + "category": "And More", + "description": "Viasat highlights why they chose DataHub over other open source and commercial technologies and their plans with it." + }, + { + "name": "LinkedIn", + "slug": "linkedin", + "imageUrl": "/img/logos/companies/linkedin.svg", + "imageSize": "medium", + "category": "B2B & B2C" + }, + { + "name": "Udemy", + "slug": "udemy", + "imageUrl": "/img/logos/companies/udemy.png", + "imageSize": "medium", + "category": "B2B & B2C" + }, + { + "name": "ThoughtWorks", + "slug": "thoughtworks", + "imageUrl": "/img/logos/companies/thoughtworks.png", + "imageSize": "medium", + "category": "B2B & B2C" + }, + { + "name": "Expedia Group", + "slug": "expedia-group", + "imageUrl": "/img/logos/companies/expedia.svg", + "imageSize": "medium", + "category": "B2B & B2C" + }, + { + "name": "Typeform", + "slug": "typeform", + "imageUrl": "/img/logos/companies/typeform.svg", + "imageSize": "medium", + "category": "B2B & B2C" + }, + { + "name": "Peloton", + "slug": "peloton", + "imageUrl": "/img/logos/companies/peloton.png", + "imageSize": "default", + "category": "B2B & B2C" + }, + { + "name": "Razer", + "slug": "razer", + "imageUrl": "/img/logos/companies/razer.jpeg", + "imageSize": "large", + "category": "B2B & B2C" + }, + { + "name": "ClassDojo", + "slug": "classdojo", + "imageUrl": "/img/logos/companies/classdojo.png", + "imageSize": "medium", + "category": "B2B & B2C" + }, + { + "name": "Klarna", + "slug": "klarna", + "imageUrl": "/img/logos/companies/klarna.svg", + "imageSize": "medium", + "category": "Financial & Fintech" + }, + { + "name": "N26", + "slug": "n26", + "imageUrl": "/img/logos/companies/n26.svg", + "imageSize": "medium", + "category": "Financial & Fintech" + }, + { + "name": "BankSalad", + "slug": "banksalad", + "imageUrl": "/img/logos/companies/banksalad.png", + "imageSize": "default", + "category": "Financial & Fintech" + }, + { + "name": "Uphold", + "slug": "uphold", + "imageUrl": "/img/logos/companies/uphold.png", + "imageSize": "default", + "category": "Financial & Fintech" + }, + { + "name": "Stash", + "slug": "stash", + "imageUrl": "/img/logos/companies/stash.svg", + "imageSize": "medium", + "category": "Financial & Fintech" + }, + { + "name": "SumUp", + "slug": "sumup", + "imageUrl": "/img/logos/companies/sumup.png", + "imageSize": "medium", + "category": "Financial & Fintech" + }, + { + "name": "VanMoof", + "slug": "vanmoof", + "imageUrl": "/img/logos/companies/vanmoof.png", + "imageSize": "small", + "category": "E-Commerce" + }, + { + "name": "SpotHero", + "slug": "spothero", + "imageUrl": "/img/logos/companies/spothero.png", + "imageSize": "default", + "category": "E-Commerce" + }, + { + "name": "hipages", + "slug": "hipages", + "imageUrl": "/img/logos/companies/hipages.png", + "imageSize": "medium", + "category": "E-Commerce" + }, + { + "name": "Showroomprive.com", + "slug": "showroomprive-com", + "imageUrl": "/img/logos/companies/showroomprive.png", + "imageSize": "small", + "category": "E-Commerce" + }, + { + "name": "Wikimedia Foundation", + "slug": "wikimedia-foundation", + "imageUrl": "/img/logos/companies/wikimedia-foundation.png", + "imageSize": "medium", + "category": "And More" + }, + { + "name": "Cabify", + "slug": "cabify", + "imageUrl": "/img/logos/companies/cabify.png", + "imageSize": "medium", + "category": "And More" + }, + { + "name": "Digital Turbine", + "slug": "digital-turbine", + "imageUrl": "/img/logos/companies/digitalturbine.svg", + "imageSize": "medium", + "category": "And More" + }, + { + "name": "DFDS", + "slug": "dfds", + "imageUrl": "/img/logos/companies/dfds.png", + "imageSize": "medium", + "category": "And More" + }, + { + "name": "Moloco", + "slug": "moloco", + "imageUrl": "/img/logos/companies/moloco.png", + "imageSize": "medium", + "category": "And More" + } + ] +} \ No newline at end of file diff --git a/docs-website/docusaurus.config.js b/docs-website/docusaurus.config.js index ab033f7b04e4ba..3b2019f785c1e2 100644 --- a/docs-website/docusaurus.config.js +++ b/docs-website/docusaurus.config.js @@ -147,8 +147,8 @@ module.exports = { label: "YouTube", }, { - href: "https://www.youtube.com/playlist?list=PLdCtLs64vZvGCKMQC2dJEZ6cUqWsREbFi", - label: "Case Studies", + href: "/adoption-stories", + label: "Adoption Stories", }, { href: "https://www.youtube.com/playlist?list=PLdCtLs64vZvErAXMiqUYH9e63wyDaMBgg", @@ -170,6 +170,14 @@ module.exports = { value: '', }, { + value: ` + 0.14.0 + + + `, + type: "html", + }, + { value: ` 0.13.0 diff --git a/docs-website/package.json b/docs-website/package.json index 62d12888323036..58820fbf42b21b 100644 --- a/docs-website/package.json +++ b/docs-website/package.json @@ -48,6 +48,7 @@ "react-dom": "18.2.0", "sass": "^1.43.2", "swc-loader": "^0.2.6", + "swiper": "^11.1.4", "uuid": "^9.0.0" }, "browserslist": { diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index d435f00902d771..1f9c0a4d79a9d8 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -31,7 +31,11 @@ module.exports = { label: "Demo", href: "https://demo.datahubproject.io/", }, - "docs/what-is-datahub/customer-stories", + { + type: "link", + label: "Adoption Stories", + href: "/adoption-stories", + }, "docs/what-is-datahub/datahub-concepts", ], }, @@ -94,6 +98,23 @@ module.exports = { }, ], }, + { + label: "Automations", + type: "category", + items: [ + { + label: "Documentation Propagation", + type: "doc", + id: "docs/automation/docs-propagation", + }, + { + label: "Snowflake Tag Sync", + type: "doc", + id: "docs/automation/snowflake-tag-propagation", + className: "saasOnly", + }, + ], + }, { label: "Business Attributes", type: "doc", @@ -523,7 +544,12 @@ module.exports = { "Advanced Guides": [ "docs/how/delete-metadata", "docs/how/configuring-authorization-with-apache-ranger", - "docs/managed-datahub/configuring-identity-provisioning-with-ms-entra", + { + "SCIM Provisioning": [ + "docs/managed-datahub/configuring-identity-provisioning-with-ms-entra", + "docs/managed-datahub/configuring-identity-provisioning-with-okta", + ], + }, "docs/how/backup-datahub", "docs/how/restore-indices", "docs/advanced/db-retention", @@ -920,6 +946,7 @@ module.exports = { // "docs/_api-guide-template" // - "metadata-service/services/README" // "metadata-ingestion/examples/structured_properties/README" + // "smoke-test/tests/openapi/README" // ], ], }; diff --git a/docs-website/src/pages/_components/CardCTAs/cardCTAs.module.scss b/docs-website/src/pages/_components/CardCTAs/cardCTAs.module.scss deleted file mode 100644 index fcd3666d03ddc9..00000000000000 --- a/docs-website/src/pages/_components/CardCTAs/cardCTAs.module.scss +++ /dev/null @@ -1,24 +0,0 @@ -.flexCol { - display: flex; -} - -.ctaCard { - flex-direction: row; - align-items: flex-start; - justify-content: space-between; - row-gap: 1rem; - padding: 1rem; - &:hover { - text-decoration: none; - border: 1px solid var(--ifm-color-primary); - background-color: var(--ifm-background-surface-color); - } - margin-bottom: 1rem; - flex: 1; -} - -.ctaHeading { - margin-bottom: 0; - display: flex; - align-items: center; -} diff --git a/docs-website/src/pages/_components/CardCTAs/index.js b/docs-website/src/pages/_components/CardCTAs/index.js deleted file mode 100644 index dc1b148d24bcd2..00000000000000 --- a/docs-website/src/pages/_components/CardCTAs/index.js +++ /dev/null @@ -1,52 +0,0 @@ -import React from "react"; -import clsx from "clsx"; -import styles from "./cardCTAs.module.scss"; -import useBaseUrl from "@docusaurus/useBaseUrl"; -import { ArrowRightOutlined } from "@ant-design/icons"; - -const cardsContent = [ - { - label: "Data Mesh", - title: "Data Products, Delivered", - url: "https://www.acryldata.io/blog/data-products-in-datahub-everything-you-need-to-know?utm_source=datahub&utm_medium=referral&utm_content=blog", - }, - { - label: "Data Contracts", - title: "Data Contracts: End-to-end Reliability in Data", - url: "https://www.acryldata.io/blog/data-contracts-in-datahub-combining-verifiability-with-holistic-data-management?utm_source=datahub&utm_medium=referral&utm_content=blog", - }, - { - label: "Shift Left", - title: "Data Governance and Lineage Impact Analysis", - url: "https://www.acryldata.io/blog/the-3-must-haves-of-metadata-management-part-2?utm_source=datahub&utm_medium=referral&utm_content=blog", - }, -]; - -const Card = ({ label, title, url }) => { - return ( - - ); -}; - -const CardCTAs = () => - cardsContent?.length > 0 ? ( -
    -
    -
    - {cardsContent.map((props, idx) => ( - - ))} -
    -
    -
    - ) : null; - -export default CardCTAs; diff --git a/docs-website/src/pages/_components/Hero/hero.module.scss b/docs-website/src/pages/_components/Hero/hero.module.scss index 6e4a623f469d51..97bdceaef69366 100644 --- a/docs-website/src/pages/_components/Hero/hero.module.scss +++ b/docs-website/src/pages/_components/Hero/hero.module.scss @@ -42,58 +42,3 @@ } } } - -.quickLinks { - display: flex; - align-items: center; - justify-content: space-between; - padding: 1rem; - font-weight: bold; - margin-bottom: -2.5vh; - @media (min-width: 768px) { - flex-direction: row; - } - - > * { - padding: 0.5rem 1rem; - display: inline-block; - - @media (min-width: 768px) { - padding: 0 1rem; - } - } -} - -.quickLinksLabel { - display: flex; - align-items: center; - svg { - width: 24px; - height: 24px; - color: var(--ifm-text-color) !important; - margin-right: 0.5rem; - } -} - -.quickstartContent { - text-align: center; - padding: 2rem 0; - height: 100%; - margin: 2rem 0; - background: #34394d; - border-radius: var(--ifm-card-border-radius); -} - -.quickstartTitle { - color: #fafafa; -} - -.quickstartSubtitle { - font-size: 1.1rem; - color: gray; -} - -.quickstartCodeblock { - text-align: left; - padding: 0 20vh; -} diff --git a/docs-website/src/pages/_components/Hero/index.js b/docs-website/src/pages/_components/Hero/index.js index 17e5d0e7f4966a..12e41e2ecd1766 100644 --- a/docs-website/src/pages/_components/Hero/index.js +++ b/docs-website/src/pages/_components/Hero/index.js @@ -7,8 +7,8 @@ import { useColorMode } from "@docusaurus/theme-common"; import { QuestionCircleOutlined } from "@ant-design/icons"; import styles from "./hero.module.scss"; import CodeBlock from "@theme/CodeBlock"; -import CardCTAs from "../CardCTAs"; import TownhallButton from "../TownhallButton"; +import { Section } from "../Section"; const HeroAnnouncement = ({ message, linkUrl, linkText }) => (
    @@ -50,33 +50,6 @@ const Hero = ({}) => {
    - -
    -

    Get Started Now

    -

    Run the following command to get started with DataHub.

    -
    - - python3 -m pip install --upgrade pip wheel setuptools
    - python3 -m pip install --upgrade acryl-datahub
    - datahub docker quickstart -
    -
    - - DataHub Quickstart Guide - - - Deploying With Kubernetes - -
    -
    -
    - - Learn -
    - What is DataHub? - How is DataHub architected? - See DataHub in action -
    ); diff --git a/docs-website/src/pages/_components/Logos/index.js b/docs-website/src/pages/_components/Logos/index.js index 3243617bcc40d6..b17c072d02d575 100644 --- a/docs-website/src/pages/_components/Logos/index.js +++ b/docs-website/src/pages/_components/Logos/index.js @@ -1,204 +1,15 @@ -import React from "react"; import clsx from "clsx"; -import Tabs from "@theme/Tabs"; -import TabItem from "@theme/TabItem"; import Link from "@docusaurus/Link"; import useBaseUrl from "@docusaurus/useBaseUrl"; - +import React from "react"; +import { Swiper, SwiperSlide } from "swiper/react"; +import "swiper/css"; +import "swiper/css/pagination"; +import { Pagination } from "swiper/modules"; import styles from "./logos.module.scss"; +const companyIndexes = require("../../../../adoptionStoriesIndexes.json"); +const companies = companyIndexes.companies; -const companiesByIndustry = [ - { - name: "B2B & B2C", - companies: [ - { - name: "LinkedIn", - imageUrl: "/img/logos/companies/linkedin.svg", - imageSize: "medium", - }, - { - name: "Udemy", - imageUrl: "/img/logos/companies/udemy.png", - imageSize: "medium", - }, - { - name: "Airtel", - imageUrl: "/img/logos/companies/airtel.png", - imageSize: "large", - }, - { - name: "Coursera", - imageUrl: "/img/logos/companies/coursera.svg", - imageSize: "small", - }, - { - name: "Geotab", - imageUrl: "/img/logos/companies/geotab.jpg", - imageSize: "small", - }, - { - name: "ThoughtWorks", - imageUrl: "/img/logos/companies/thoughtworks.png", - imageSize: "medium", - }, - { - name: "Expedia Group", - imageUrl: "/img/logos/companies/expedia.svg", - imageSize: "medium", - }, - { - name: "Typeform", - imageUrl: "/img/logos/companies/typeform.svg", - imageSize: "medium", - }, - { - name: "Peloton", - imageUrl: "/img/logos/companies/peloton.png", - imageSize: "default", - }, - { - name: "Zynga", - imageUrl: "/img/logos/companies/zynga.png", - imageSize: "default", - }, - { - name: "Hurb", - imageUrl: "/img/logos/companies/hurb.png", - imageSize: "medium", - }, - { - name: "Razer", - imageUrl: "/img/logos/companies/razer.jpeg", - imageSize: "large", - }, - { - name: "ClassDojo", - imageUrl: "/img/logos/companies/classdojo.png", - imageSize: "medium", - }, - ], - }, - { - name: "Financial & Fintech", - companies: [ - { - name: "Saxo Bank", - imageUrl: "/img/logos/companies/saxobank.svg", - imageSize: "default", - }, - { - name: "Klarna", - imageUrl: "/img/logos/companies/klarna.svg", - imageSize: "medium", - }, - { - name: "N26", - imageUrl: "/img/logos/companies/n26.svg", - imageSize: "medium", - }, - { - name: "BankSalad", - imageUrl: "/img/logos/companies/banksalad.png", - imageSize: "default", - }, - { - name: "Uphold", - imageUrl: "/img/logos/companies/uphold.png", - imageSize: "default", - }, - { - name: "Stash", - imageUrl: "/img/logos/companies/stash.svg", - imageSize: "medium", - }, - { - name: "SumUp", - imageUrl: "/img/logos/companies/sumup.png", - imageSize: "medium", - }, - ], - }, - { - name: "E-Commerce", - companies: [ - { - name: "Adevinta", - imageUrl: "/img/logos/companies/adevinta.png", - imageSize: "medium", - }, - { - name: "VanMoof", - imageUrl: "/img/logos/companies/vanmoof.png", - imageSize: "small", - }, - { - name: "Grofers", - imageUrl: "/img/logos/companies/grofers.png", - imageSize: "medium", - }, - { - name: "SpotHero", - imageUrl: "/img/logos/companies/spothero.png", - imageSize: "default", - }, - { - name: "hipages", - imageUrl: "/img/logos/companies/hipages.png", - imageSize: "medium", - }, - { - name: "Wolt", - imageUrl: "/img/logos/companies/wolt.png", - imageSize: "default", - }, - { - name: "Showroomprive.com", - imageUrl: "/img/logos/companies/showroomprive.png", - imageSize: "small", - }, - ], - }, - { - name: "And More", - companies: [ - { - name: "Wikimedia Foundation", - imageUrl: "/img/logos/companies/wikimedia-foundation.png", - imageSize: "medium", - }, - { - name: "Cabify", - imageUrl: "/img/logos/companies/cabify.png", - imageSize: "medium", - }, - { - name: "Digital Turbine", - imageUrl: "/img/logos/companies/digitalturbine.svg", - imageSize: "medium", - }, - { - name: "Viasat", - imageUrl: "/img/logos/companies/viasat.png", - imageSize: "medium", - }, - { - name: "DFDS", - imageUrl: "/img/logos/companies/dfds.png", - imageSize: "medium", - }, - { - name: "Moloco", - imageUrl: "/img/logos/companies/moloco.png", - imageSize: "medium", - }, - { - name: "Optum", - imageUrl: "/img/logos/companies/optum.jpg", - imageSize: "medium", - }, - ], - }, -]; const platformLogos = [ { @@ -315,10 +126,19 @@ const platformLogos = [ ]; export const PlatformLogos = () => ( - +
    {[...platformLogos, ...platformLogos].map((logo, idx) => ( - {logo.name} + {logo.name} ))}
    @@ -326,22 +146,58 @@ export const PlatformLogos = () => ( export const CompanyLogos = () => (
    - - {companiesByIndustry.map((industry, idx) => ( - -
    - {industry.companies.map((company, idx) => ( + + {companies + .filter((company) => company.imageUrl) // Filter companies with imageUrl + .map((company, idx) => ( + + {company.link ? ( + + {company.name} + + ) : ( {company.name} - ))} -
    -
    - ))} -
    + )} + + ))} +
    ); diff --git a/docs-website/src/pages/_components/Logos/logos.module.scss b/docs-website/src/pages/_components/Logos/logos.module.scss index b20cc9a48b247c..fd331bccb45563 100644 --- a/docs-website/src/pages/_components/Logos/logos.module.scss +++ b/docs-website/src/pages/_components/Logos/logos.module.scss @@ -1,3 +1,7 @@ +.pillTabs { + justify-content: center; +} + .marquee { width: 100%; overflow: hidden; @@ -35,7 +39,6 @@ } .companyWrapper { - background: #fff; display: flex; flex-wrap: wrap; align-items: center; @@ -45,16 +48,25 @@ filter: invert(1); mix-blend-mode: exclusion; } + + :global { + .swiper-wrapper { + display: flex; + align-items: center; + margin-bottom: 1rem; + .swiper-slide { + display: flex; + justify-content: center; + align-items: center; + height: 100%; + } + } + } } .companyLogoContainer { - display: flex; - align-items: center; justify-content: center; > div { - display: flex; - flex-direction: column; - align-items: center; ul[role="tablist"] { padding: 0 1rem; overflow-x: auto; @@ -79,23 +91,35 @@ } } +.companyLogoWithLink { + &:hover { + opacity: 1; + filter: grayscale(0%); + } +} + .companyLogo { - width: auto; + flex-shrink: 0; + width: 100%; + height: auto; mix-blend-mode: luminosity; opacity: 0.66; - margin: 2.5rem; - height: 60px; + filter: grayscale(100%); &.default { - height: 60px; + padding: 30px; } &.large { - height: 100px; + padding: 5px; } &.medium { - height: 30px; + padding: 15px; } &.small { - height: 20px; + padding: 10px; } } + +.swiper-pagination { + margin-top: 1rem; +} \ No newline at end of file diff --git a/docs-website/src/pages/_components/QuickstartContent/index.js b/docs-website/src/pages/_components/QuickstartContent/index.js new file mode 100644 index 00000000000000..8c942a6a2e440b --- /dev/null +++ b/docs-website/src/pages/_components/QuickstartContent/index.js @@ -0,0 +1,50 @@ +import React from "react"; +import clsx from "clsx"; +import Link from "@docusaurus/Link"; +import useBaseUrl from "@docusaurus/useBaseUrl"; +import Image from "@theme/IdealImage"; +import { useColorMode } from "@docusaurus/theme-common"; +import { QuestionCircleOutlined } from "@ant-design/icons"; +import styles from "./quickstartcontent.module.scss"; +import CodeBlock from "@theme/CodeBlock"; +import TownhallButton from "../TownhallButton"; +import { Section } from "../Section"; + + +const QuickstartContent = ({}) => { + const { colorMode } = useColorMode(); + return ( +
    +
    +
    +

    Get Started Now

    +

    Run the following command to get started with DataHub.

    +
    + + python3 -m pip install --upgrade pip wheel setuptools
    + python3 -m pip install --upgrade acryl-datahub
    + datahub docker quickstart +
    +
    + + DataHub Quickstart Guide + + + Deploying With Kubernetes + +
    +
    +
    + + Learn +
    + What is DataHub? + How is DataHub architected? + See DataHub in action +
    +
    +
    + ); +}; + +export default QuickstartContent; diff --git a/docs-website/src/pages/_components/QuickstartContent/quickstartcontent.module.scss b/docs-website/src/pages/_components/QuickstartContent/quickstartcontent.module.scss new file mode 100644 index 00000000000000..e1badca6d2e348 --- /dev/null +++ b/docs-website/src/pages/_components/QuickstartContent/quickstartcontent.module.scss @@ -0,0 +1,67 @@ +.container { + margin-bottom: 2rem; +} + +.button { + text-decoration: none; + margin: 0.5rem 0 0 0; + white-space: nowrap; + @media (min-width: 690px) { + margin: 0 0 0 0.5rem; + } +} + +.quickstartContent { + text-align: center; + padding: 2rem 0; + height: 100%; + margin: 2rem 0; + background: #34394d; + border-radius: var(--ifm-card-border-radius); +} + +.quickstartTitle { + color: #fafafa; +} + +.quickstartSubtitle { + font-size: 1.1rem; + color: gray; +} + +.quickstartCodeblock { + text-align: left; + padding: 0 20vh; +} + +.quickLinks { + display: flex; + align-items: center; + justify-content: space-between; + padding: 1rem; + font-weight: bold; + margin-bottom: -2.5vh; + @media (min-width: 768px) { + flex-direction: row; + } + + > * { + padding: 0.5rem 1rem; + display: inline-block; + + @media (min-width: 768px) { + padding: 0 1rem; + } + } +} + +.quickLinksLabel { + display: flex; + align-items: center; + svg { + width: 24px; + height: 24px; + color: var(--ifm-text-color) !important; + margin-right: 0.5rem; + } +} diff --git a/docs-website/src/pages/_components/Quotes/index.js b/docs-website/src/pages/_components/Quotes/index.js index b66a04c2c6538e..664eba0b120519 100644 --- a/docs-website/src/pages/_components/Quotes/index.js +++ b/docs-website/src/pages/_components/Quotes/index.js @@ -32,7 +32,7 @@ const quotesContent = [ const Quote = ({ quote, company }) => { return ( -
    +
    {quote}
    diff --git a/docs-website/src/pages/_components/Quotes/quotes.module.scss b/docs-website/src/pages/_components/Quotes/quotes.module.scss index 59573fa7a597a2..3bd895c4e3b243 100644 --- a/docs-website/src/pages/_components/Quotes/quotes.module.scss +++ b/docs-website/src/pages/_components/Quotes/quotes.module.scss @@ -8,6 +8,7 @@ .companyLogoWrapper { background: #fff; + height: 100px; html[data-theme="dark"] & { filter: invert(1); mix-blend-mode: exclusion; diff --git a/docs-website/src/pages/adoption-stories/_components/LearnItemCard/index.jsx b/docs-website/src/pages/adoption-stories/_components/LearnItemCard/index.jsx new file mode 100644 index 00000000000000..67b94788d97800 --- /dev/null +++ b/docs-website/src/pages/adoption-stories/_components/LearnItemCard/index.jsx @@ -0,0 +1,25 @@ +import React from "react"; +import clsx from "clsx"; +import Link from "@docusaurus/Link"; +import styles from "./styles.module.scss"; + +const LearnItemCard = React.forwardRef(({ company, isSelected }, ref) => { + return ( +
    +
    +
    + {company.name} +
    +
    +
    +
    + + Discover {company.name}'s Story + +
    +
    +
    + ); +}); + +export default LearnItemCard; diff --git a/docs-website/src/pages/adoption-stories/_components/LearnItemCard/styles.module.scss b/docs-website/src/pages/adoption-stories/_components/LearnItemCard/styles.module.scss new file mode 100644 index 00000000000000..881e90a7d09763 --- /dev/null +++ b/docs-website/src/pages/adoption-stories/_components/LearnItemCard/styles.module.scss @@ -0,0 +1,66 @@ +.featureCol { + display: flex; +} + +.card_date { + padding: 1rem 2rem; + font-size: 0.8rem; + font-style: italic; + color: gray; + margin-top: auto; +} + +.card_feature { + font-size: 2rem; + font-weight: 700; +} + +.card_button { + padding: 1rem; + text-align: center; +} + +.card { + color: var(--ifm-text-color); + text-decoration: none !important; + padding: 0rem; + margin-bottom: 2rem; + align-self: stretch; + flex-grow: 1; + &:hover { + opacity: 0.9; + } + + &.selected { + border-color: var(--ifm-color-primary); + box-shadow: rgba(0, 0, 0, 0.35) 0px 5px 15px; + scroll-margin-top: 100px; + } + + + hr { + margin: 0; + } +} + +.featureHeader { + h2 { + margin-bottom: 1rem !important; + font-size: 1.25rem; + } + padding: 1rem 2rem; +} + +.featureBody { + padding: 0 2rem; +} + +.card_image { + margin: 0; + margin-bottom: 0.5rem; + + img { + width: 100%; + height: auto; + } +} \ No newline at end of file diff --git a/docs-website/src/pages/adoption-stories/index.jsx b/docs-website/src/pages/adoption-stories/index.jsx new file mode 100644 index 00000000000000..27f4b876af20a6 --- /dev/null +++ b/docs-website/src/pages/adoption-stories/index.jsx @@ -0,0 +1,83 @@ +import React, { useState, useEffect, useRef } from "react"; +import Layout from "@theme/Layout"; +import BrowserOnly from "@docusaurus/BrowserOnly"; +import LearnItemCard from "./_components/LearnItemCard"; +import styles from "./styles.module.scss"; + +import customerStoriesIndexes from "../../../adoptionStoriesIndexes.json"; + +function AdoptionStoriesListPageContent() { + const companies = (customerStoriesIndexes?.companies || []).filter((company) => company.link); + const [activeFilters, setActiveFilters] = useState([]); + const categories = ["B2B & B2C", "E-Commerce", "Financial & Fintech", "And More"]; + const selectedCardRef = useRef(null); + + const filteredItems = activeFilters.length + ? companies.filter((company) => activeFilters.includes(company.category)) + : companies; + + const handleFilterToggle = (category) => { + if (activeFilters.includes(category)) { + setActiveFilters(activeFilters.filter((filter) => filter !== category)); + } else { + setActiveFilters([...new Set([...activeFilters, category])]); + } + }; + + useEffect(() => { + const selectedSlug = window.location.hash.substring(1); + if (selectedCardRef.current) { + selectedCardRef.current.scrollIntoView({ behavior: "smooth", block: "start", inline: "nearest" }); + } + }, [selectedCardRef]); + + return ( + +
    +
    +
    +
    +

    DataHub Adoption Stories

    +

    Learn how the best data and AI teams are using DataHub +
    + Check out more stories on the DataHub Youtube. +

    +
    +
    +
    + For: + {categories.map((category) => ( + + ))} +
    +
    +
    +
    +
    + {filteredItems.map((company) => ( + + ))} +
    +
    +
    + ); +} + +export default function AdoptionStoriesListPage() { + return ( + + {() => } + + ); +} diff --git a/docs-website/src/pages/adoption-stories/styles.module.scss b/docs-website/src/pages/adoption-stories/styles.module.scss new file mode 100644 index 00000000000000..d08b48a011de07 --- /dev/null +++ b/docs-website/src/pages/adoption-stories/styles.module.scss @@ -0,0 +1,7 @@ +.filterBar { + display: flex; + justify-content: center; + align-items: center; + gap: 10px; + flex-wrap: wrap; +} \ No newline at end of file diff --git a/docs-website/src/pages/cloud/CompanyLogos/logos.module.scss b/docs-website/src/pages/cloud/CompanyLogos/logos.module.scss index a6a9dba9d8d41f..de404627cf0394 100644 --- a/docs-website/src/pages/cloud/CompanyLogos/logos.module.scss +++ b/docs-website/src/pages/cloud/CompanyLogos/logos.module.scss @@ -21,6 +21,7 @@ .scrollingCustomers { position: relative; overflow: hidden; + opacity: 0.5; } @@ -29,7 +30,7 @@ padding: 1.25rem 0; position: relative; align-items: center; - animation: scrollingCustomerAnimate 15s linear infinite; + animation: scrollingCustomerAnimate 60s linear infinite; } .scrollingCustomers__inner img { @@ -46,7 +47,7 @@ .animateScrollingCustomers { display: flex; - animation: scrollingCustomerAnimate 15s linear infinite; + animation: scrollingCustomerAnimate 60s linear infinite; } @media (max-width: 767px) { diff --git a/docs-website/src/pages/cloud/index.js b/docs-website/src/pages/cloud/index.js index 5166d80bf3b7b0..00437c8a7640a7 100644 --- a/docs-website/src/pages/cloud/index.js +++ b/docs-website/src/pages/cloud/index.js @@ -51,7 +51,7 @@ function Home() {
    -
    +
    @@ -73,12 +73,13 @@ function Home() { Product Tour -
    +
    + {/*
    An extension of the DataHub Core project.
    View Cloud Docs. -
    +
    */}
    diff --git a/docs-website/src/pages/cloud/styles.module.scss b/docs-website/src/pages/cloud/styles.module.scss index d1ac31f3ef8cc0..b805063750dd4b 100644 --- a/docs-website/src/pages/cloud/styles.module.scss +++ b/docs-website/src/pages/cloud/styles.module.scss @@ -10,6 +10,7 @@ } .hero { + margin-top: 80px; :global { .button { margin-right: 1rem; diff --git a/docs-website/src/pages/docs/_components/CustomerCard/customercard.module.scss b/docs-website/src/pages/docs/_components/CustomerCard/customercard.module.scss deleted file mode 100644 index 349f705d25b10d..00000000000000 --- a/docs-website/src/pages/docs/_components/CustomerCard/customercard.module.scss +++ /dev/null @@ -1,56 +0,0 @@ -.card { - color: var(--ifm-hero-text-color); - padding: 0; - margin: 0rem 3rem 2rem 0rem; - text-decoration: none !important; - - .card_button { - padding: 0rem 0rem 0rem 1rem; - text-align: right; - } - - .card_img { - justify-content: center; - display: flex; - height: 250px; - margin: 0; - position: relative; - text-align: center; - } - - .card_body { - padding: 2rem 3rem 2rem 3rem; - - .card_description { - min-height: 20rem; - } - } - - .card_overlay_text { - position: absolute; - text-align: left; - width: 80%; - top: 50%; - left: 50%; - transform: translate(-50%, -50%); - color: white; - - .card_customer { - font-size: 3.2rem; - font-weight: 800; - line-height: 1.2; - - } - .card_title { - font-size: 1.2rem; - font-weight: 600; - } - - } - - img { - object-fit: cover; - filter: brightness(50%); - } - -} diff --git a/docs-website/src/pages/docs/_components/CustomerCard/index.jsx b/docs-website/src/pages/docs/_components/CustomerCard/index.jsx deleted file mode 100644 index 36c83226e1f732..00000000000000 --- a/docs-website/src/pages/docs/_components/CustomerCard/index.jsx +++ /dev/null @@ -1,30 +0,0 @@ -import React from "react"; -import clsx from "clsx"; -import styles from "./customercard.module.scss"; -import Link from "@docusaurus/Link"; - -const CustomerCard = ({ customer, title, imgUrl, description, to,}) => { - return ( -
    -
    -
    - {customer} -
    -
    {customer}
    -
    {title}
    -
    -
    -
    -

    {description}

    -
    - - Discover {customer}'s Story - -
    -
    -
    -
    - ); -}; - -export default CustomerCard; diff --git a/docs-website/src/pages/docs/_components/CustomerCardSection/index.jsx b/docs-website/src/pages/docs/_components/CustomerCardSection/index.jsx index 27067cb3930ebc..505a2810c9433c 100644 --- a/docs-website/src/pages/docs/_components/CustomerCardSection/index.jsx +++ b/docs-website/src/pages/docs/_components/CustomerCardSection/index.jsx @@ -93,4 +93,4 @@ const CustomerCardSection = () => { ); }; -export default CustomerCardSection; +export default CustomerCardSection; \ No newline at end of file diff --git a/docs-website/src/pages/index.js b/docs-website/src/pages/index.js index 68b177d10f7aff..2eed41b4ad1bd3 100644 --- a/docs-website/src/pages/index.js +++ b/docs-website/src/pages/index.js @@ -3,13 +3,14 @@ import Layout from "@theme/Layout"; import Link from "@docusaurus/Link"; import useDocusaurusContext from "@docusaurus/useDocusaurusContext"; import CodeBlock from "@theme/CodeBlock"; - +import useBaseUrl from "@docusaurus/useBaseUrl"; import Hero from "./_components/Hero"; import Features from "./_components/Features"; -import Quotes from "./_components/Quotes"; import { Section, PromoSection } from "./_components/Section"; -import { PlatformLogos, CompanyLogos } from "./_components/Logos"; +import { PlatformLogos } from "./_components/Logos"; import RoundedImage from "./_components/RoundedImage"; +import { CompanyLogos } from "./_components/Logos"; +import QuickstartContent from "./_components/QuickstartContent"; const example_recipe = ` source: @@ -38,6 +39,18 @@ function Home() { description="DataHub is a data discovery application built on an extensible data catalog that helps you tame the complexity of diverse data ecosystems." > +
    + +
    + + Check Out Adoption Stories → + +
    +
    +
    @@ -157,10 +170,6 @@ function Home() {
    -
    - - -
    ) : null; } diff --git a/docs-website/static/img/adoption-stories/adoption-stories-adevinta.png b/docs-website/static/img/adoption-stories/adoption-stories-adevinta.png new file mode 100644 index 00000000000000..6c790995843c54 Binary files /dev/null and b/docs-website/static/img/adoption-stories/adoption-stories-adevinta.png differ diff --git a/docs-website/static/img/adoption-stories/adoption-stories-airtel.png b/docs-website/static/img/adoption-stories/adoption-stories-airtel.png new file mode 100644 index 00000000000000..ae5ebdedd47aa1 Binary files /dev/null and b/docs-website/static/img/adoption-stories/adoption-stories-airtel.png differ diff --git a/docs-website/static/img/adoption-stories/adoption-stories-checkout-com.png b/docs-website/static/img/adoption-stories/adoption-stories-checkout-com.png new file mode 100644 index 00000000000000..f6b574e1c37910 Binary files /dev/null and b/docs-website/static/img/adoption-stories/adoption-stories-checkout-com.png differ diff --git a/docs-website/static/img/adoption-stories/adoption-stories-chime.png b/docs-website/static/img/adoption-stories/adoption-stories-chime.png new file mode 100644 index 00000000000000..4c17b1628a36f6 Binary files /dev/null and b/docs-website/static/img/adoption-stories/adoption-stories-chime.png differ diff --git a/docs-website/static/img/adoption-stories/adoption-stories-coursera.png b/docs-website/static/img/adoption-stories/adoption-stories-coursera.png new file mode 100644 index 00000000000000..4f473874d0dc26 Binary files /dev/null and b/docs-website/static/img/adoption-stories/adoption-stories-coursera.png differ diff --git a/docs-website/static/img/adoption-stories/adoption-stories-geotab.png b/docs-website/static/img/adoption-stories/adoption-stories-geotab.png new file mode 100644 index 00000000000000..2b3c8a158273a9 Binary files /dev/null and b/docs-website/static/img/adoption-stories/adoption-stories-geotab.png differ diff --git a/docs-website/static/img/adoption-stories/adoption-stories-grofers.png b/docs-website/static/img/adoption-stories/adoption-stories-grofers.png new file mode 100644 index 00000000000000..51af8a3ad69d7b Binary files /dev/null and b/docs-website/static/img/adoption-stories/adoption-stories-grofers.png differ diff --git a/docs-website/static/img/adoption-stories/adoption-stories-hurb.png b/docs-website/static/img/adoption-stories/adoption-stories-hurb.png new file mode 100644 index 00000000000000..b7b8bae5d8c321 Binary files /dev/null and b/docs-website/static/img/adoption-stories/adoption-stories-hurb.png differ diff --git a/docs-website/static/img/adoption-stories/adoption-stories-mediamarkt-saturn.png b/docs-website/static/img/adoption-stories/adoption-stories-mediamarkt-saturn.png new file mode 100644 index 00000000000000..ac2f524a7a0e77 Binary files /dev/null and b/docs-website/static/img/adoption-stories/adoption-stories-mediamarkt-saturn.png differ diff --git a/docs-website/static/img/adoption-stories/adoption-stories-netflix.png b/docs-website/static/img/adoption-stories/adoption-stories-netflix.png new file mode 100644 index 00000000000000..de65a4c59419b5 Binary files /dev/null and b/docs-website/static/img/adoption-stories/adoption-stories-netflix.png differ diff --git a/docs-website/static/img/adoption-stories/adoption-stories-optum.png b/docs-website/static/img/adoption-stories/adoption-stories-optum.png new file mode 100644 index 00000000000000..051abaa96a0e01 Binary files /dev/null and b/docs-website/static/img/adoption-stories/adoption-stories-optum.png differ diff --git a/docs-website/static/img/adoption-stories/adoption-stories-pinterest.png b/docs-website/static/img/adoption-stories/adoption-stories-pinterest.png new file mode 100644 index 00000000000000..e005ea6d5750aa Binary files /dev/null and b/docs-website/static/img/adoption-stories/adoption-stories-pinterest.png differ diff --git a/docs-website/static/img/adoption-stories/adoption-stories-saxo-bank.png b/docs-website/static/img/adoption-stories/adoption-stories-saxo-bank.png new file mode 100644 index 00000000000000..333003d146cf5e Binary files /dev/null and b/docs-website/static/img/adoption-stories/adoption-stories-saxo-bank.png differ diff --git a/docs-website/static/img/adoption-stories/adoption-stories-viasat.png b/docs-website/static/img/adoption-stories/adoption-stories-viasat.png new file mode 100644 index 00000000000000..b6f633450296c6 Binary files /dev/null and b/docs-website/static/img/adoption-stories/adoption-stories-viasat.png differ diff --git a/docs-website/static/img/adoption-stories/adoption-stories-visa.png b/docs-website/static/img/adoption-stories/adoption-stories-visa.png new file mode 100644 index 00000000000000..11d732faf85fec Binary files /dev/null and b/docs-website/static/img/adoption-stories/adoption-stories-visa.png differ diff --git a/docs-website/static/img/adoption-stories/adoption-stories-wolt.png b/docs-website/static/img/adoption-stories/adoption-stories-wolt.png new file mode 100644 index 00000000000000..43501a1f2f6d57 Binary files /dev/null and b/docs-website/static/img/adoption-stories/adoption-stories-wolt.png differ diff --git a/docs-website/static/img/adoption-stories/adoption-stories-zynga.png b/docs-website/static/img/adoption-stories/adoption-stories-zynga.png new file mode 100644 index 00000000000000..94ee9e9b2fb8ee Binary files /dev/null and b/docs-website/static/img/adoption-stories/adoption-stories-zynga.png differ diff --git a/docs-website/static/img/adoption-stories/img.png b/docs-website/static/img/adoption-stories/img.png new file mode 100644 index 00000000000000..4d4971018c3982 Binary files /dev/null and b/docs-website/static/img/adoption-stories/img.png differ diff --git a/docs-website/static/img/assets/business.jpg b/docs-website/static/img/assets/business.jpg deleted file mode 100644 index f5a91928ee2ad8..00000000000000 Binary files a/docs-website/static/img/assets/business.jpg and /dev/null differ diff --git a/docs-website/static/img/assets/netflix.jpg b/docs-website/static/img/assets/netflix.jpg deleted file mode 100644 index 8b555f5b63187f..00000000000000 Binary files a/docs-website/static/img/assets/netflix.jpg and /dev/null differ diff --git a/docs-website/static/img/assets/phonecall.jpg b/docs-website/static/img/assets/phonecall.jpg deleted file mode 100644 index 87e48f28213827..00000000000000 Binary files a/docs-website/static/img/assets/phonecall.jpg and /dev/null differ diff --git a/docs-website/static/img/assets/travel.jpg b/docs-website/static/img/assets/travel.jpg deleted file mode 100644 index de2697f5631217..00000000000000 Binary files a/docs-website/static/img/assets/travel.jpg and /dev/null differ diff --git a/docs-website/static/img/logos/companies/checkout-com.svg b/docs-website/static/img/logos/companies/checkout-com.svg new file mode 100644 index 00000000000000..1eae8d3dbd4067 --- /dev/null +++ b/docs-website/static/img/logos/companies/checkout-com.svg @@ -0,0 +1,22 @@ + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs-website/static/img/logos/companies/chime.png b/docs-website/static/img/logos/companies/chime.png new file mode 100644 index 00000000000000..c94f9742eb6d55 Binary files /dev/null and b/docs-website/static/img/logos/companies/chime.png differ diff --git a/docs-website/static/img/logos/companies/pinterest.png b/docs-website/static/img/logos/companies/pinterest.png new file mode 100644 index 00000000000000..715c8c33fd85b4 Binary files /dev/null and b/docs-website/static/img/logos/companies/pinterest.png differ diff --git a/docs-website/static/img/logos/scrollingCompanies/saxo_bank.webp b/docs-website/static/img/logos/scrollingCompanies/saxo_bank.webp deleted file mode 100644 index a4c1aae73fe48b..00000000000000 Binary files a/docs-website/static/img/logos/scrollingCompanies/saxo_bank.webp and /dev/null differ diff --git a/docs-website/versions.json b/docs-website/versions.json index afd30a317c618b..5288c42437c779 100644 --- a/docs-website/versions.json +++ b/docs-website/versions.json @@ -1,3 +1,4 @@ [ + "0.14.0", "0.13.1" ] diff --git a/docs-website/yarn.lock b/docs-website/yarn.lock index a93b0e74c327db..0970a59cbc00a3 100644 --- a/docs-website/yarn.lock +++ b/docs-website/yarn.lock @@ -1827,7 +1827,7 @@ "@docusaurus/theme-search-algolia" "2.4.3" "@docusaurus/types" "2.4.3" -"@docusaurus/react-loadable@5.5.2": +"@docusaurus/react-loadable@5.5.2", "react-loadable@npm:@docusaurus/react-loadable@5.5.2": version "5.5.2" resolved "https://registry.yarnpkg.com/@docusaurus/react-loadable/-/react-loadable-5.5.2.tgz#81aae0db81ecafbdaee3651f12804580868fa6ce" integrity sha512-A3dYjdBGuy0IGT+wyLIGIKLRE+sAk1iNk0f1HjNDysO7u8lhL4N3VEm+FAubmJbAztn94F7MxBTPmnixbiyFdQ== @@ -9705,14 +9705,6 @@ react-loadable-ssr-addon-v5-slorber@^1.0.1: dependencies: "@babel/runtime" "^7.10.3" -"react-loadable@npm:@docusaurus/react-loadable@5.5.2": - version "5.5.2" - resolved "https://registry.yarnpkg.com/@docusaurus/react-loadable/-/react-loadable-5.5.2.tgz#81aae0db81ecafbdaee3651f12804580868fa6ce" - integrity sha512-A3dYjdBGuy0IGT+wyLIGIKLRE+sAk1iNk0f1HjNDysO7u8lhL4N3VEm+FAubmJbAztn94F7MxBTPmnixbiyFdQ== - dependencies: - "@types/react" "*" - prop-types "^15.6.2" - react-markdown@^8.0.6: version "8.0.7" resolved "https://registry.yarnpkg.com/react-markdown/-/react-markdown-8.0.7.tgz#c8dbd1b9ba5f1c5e7e5f2a44de465a3caafdf89b" @@ -10866,6 +10858,11 @@ swc-loader@^0.2.6: dependencies: "@swc/counter" "^0.1.3" +swiper@^11.1.4: + version "11.1.4" + resolved "https://registry.yarnpkg.com/swiper/-/swiper-11.1.4.tgz#2f8e303e8bf9e5bc40a3885fc637ae60ff27996c" + integrity sha512-1n7kbYJB2dFEpUHRFszq7gys/ofIBrMNibwTiMvPHwneKND/t9kImnHt6CfGPScMHgI+dWMbGTycCKGMoOO1KA== + symbol-observable@^1.0.4: version "1.2.0" resolved "https://registry.yarnpkg.com/symbol-observable/-/symbol-observable-1.2.0.tgz#c22688aed4eab3cdc2dfeacbb561660560a00804" diff --git a/docs/api/tutorials/forms.md b/docs/api/tutorials/forms.md index 3f28353595be72..eb555910f18eb7 100644 --- a/docs/api/tutorials/forms.md +++ b/docs/api/tutorials/forms.md @@ -9,16 +9,16 @@ Documentation Forms are a way for end-users to fill out all mandatory attributes Learn more about forms in the [Documentation Forms Feature Guide](../../../docs/features/feature-guides/documentation-forms.md). - ### Goal Of This Guide -This guide will show you how to create and read forms. +This guide will show you how to +- Create, Update, Read, and Delete a form +- Assign and Remove a form from entities ## Prerequisites For this tutorial, you need to deploy DataHub Quickstart and ingest sample data. For detailed information, please refer to [Datahub Quickstart Guide](/docs/quickstart.md). - @@ -29,14 +29,45 @@ Connect to your instance via [init](https://datahubproject.io/docs/cli/#init): 2. Set the server to your sandbox instance, `https://{your-instance-address}/gms` 3. Set the token to your access token - - ## Create a Form + + +```graphql +mutation createForm { + createForm( + input: { + id: "metadataInitiative2024", + name: "Metadata Initiative 2024", + description: "How we want to ensure the most important data assets in our organization have all of the most important and expected pieces of metadata filled out", + type: VERIFICATION, + prompts: [ + { + id: "123", + title: "retentionTime", + description: "Apply Retention Time structured property to form", + type: STRUCTURED_PROPERTY, + structuredPropertyParams: { + urn: "urn:li:structuredProperty:retentionTime" + } + } + ], + actors: { + users: ["urn:li:corpuser:jane@email.com", "urn:li:corpuser:john@email.com"], + groups: ["urn:li:corpGroup:team@email.com"] + } + } + ) { + urn + } +} +``` + + Create a yaml file representing the forms you’d like to load. @@ -111,8 +142,42 @@ If successful, you should see `Created form urn:li:form:...` -## Read Property Definition +## Update Form + + + +```graphql +mutation updateForm { + updateForm( + input: { + urn: "urn:li:form:metadataInitiative2024", + name: "Metadata Initiative 2024", + description: "How we want to ensure the most important data assets in our organization have all of the most important and expected pieces of metadata filled out", + type: VERIFICATION, + promptsToAdd: [ + { + id: "456", + title: "deprecationDate", + description: "Deprecation date for dataset", + type: STRUCTURED_PROPERTY, + structuredPropertyParams: { + urn: "urn:li:structuredProperty:deprecationDate" + } + } + ] + promptsToRemove: ["123"] + } + ) { + urn + } +} +``` + + + + +## Read Property Definition @@ -146,3 +211,60 @@ If successful, you should see metadata about your form returned like below. + +## Delete Form + + + + +```graphql +mutation deleteForm { + deleteForm( + input: { + urn: "urn:li:form:metadataInitiative2024" + } + ) +} +``` + + + +## Assign Form to Entities + +For assigning a form to a given list of entities: + + + + +```graphql +mutation batchAssignForm { + batchAssignForm( + input: { + formUrn: "urn:li:form:myform", + entityUrns: ["urn:li:dataset:mydataset1", "urn:li:dataset:mydataset2"] + } + ) +} +``` + + + +## Remove Form from Entities + +For removing a form from a given list of entities: + + + + +```graphql +mutation batchRemoveForm { + batchRemoveForm( + input: { + formUrn: "urn:li:form:myform", + entityUrns: ["urn:li:dataset:mydataset1", "urn:li:dataset:mydataset2"] + } + ) +} +``` + + diff --git a/docs/api/tutorials/structured-properties.md b/docs/api/tutorials/structured-properties.md index c56a2848638fc2..00e992f2bd0bbf 100644 --- a/docs/api/tutorials/structured-properties.md +++ b/docs/api/tutorials/structured-properties.md @@ -56,7 +56,33 @@ Requirements for OpenAPI are: The following code will create a structured property `io.acryl.privacy.retentionTime`. - + + +```graphql +mutation createStructuredProperty { + createStructuredProperty( + input: { + id: "retentionTime", + qualifiedName:"retentionTime", + displayName: "Retention Time", + description: "Retention Time is used to figure out how long to retain records in a dataset", + valueType: "urn:li:dataType:number", + allowedValues: [ + {numberValue: 30, description: "30 days, usually reserved for datasets that are ephemeral and contain pii"}, + {numberValue: 90, description:"description: Use this for datasets that drive monthly reporting but contain pii"}, + {numberValue: 365, description:"Use this for non-sensitive data that can be retained for longer"} + ], + cardinality: SINGLE, + entityTypes: ["urn:li:entityType:dataset", "urn:li:entityType:dataFlow"], + } + ) { + urn + } +} +``` + + + Create a yaml file representing the properties you’d like to load. For example, below file represents a property `io.acryl.privacy.retentionTime`. You can see the full example [here](https://github.com/datahub-project/datahub/blob/example-yaml-sp/metadata-ingestion/examples/structured_properties/struct_props.yaml). @@ -132,29 +158,37 @@ curl -X 'POST' -v \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ + "value": { "qualifiedName": "io.acryl.privacy.retentionTime", - "valueType": "urn:li:dataType:datahub.number", - "description": "Retention Time is used to figure out how long to retain records in a dataset", - "displayName": "Retention Time", - "cardinality": "MULTIPLE", - "entityTypes": [ - "urn:li:entityType:datahub.dataset", - "urn:li:entityType:datahub.dataFlow" - ], - "allowedValues": [ - { - "value": {"double": 30}, - "description": "30 days, usually reserved for datasets that are ephemeral and contain pii" - }, - { - "value": {"double": 60}, - "description": "Use this for datasets that drive monthly reporting but contain pii" - }, - { - "value": {"double": 365}, - "description": "Use this for non-sensitive data that can be retained for longer" - } - ] + "valueType": "urn:li:dataType:datahub.number", + "description": "Retention Time is used to figure out how long to retain records in a dataset", + "displayName": "Retention Time", + "cardinality": "MULTIPLE", + "entityTypes": [ + "urn:li:entityType:datahub.dataset", + "urn:li:entityType:datahub.dataFlow" + ], + "allowedValues": [ + { + "value": { + "double": 30 + }, + "description": "30 days, usually reserved for datasets that are ephemeral and contain pii" + }, + { + "value": { + "double": 60 + }, + "description": "Use this for datasets that drive monthly reporting but contain pii" + }, + { + "value": { + "double": 365 + }, + "description": "Use this for non-sensitive data that can be retained for longer" + } + ] + } }' | jq ``` @@ -355,7 +389,37 @@ Example Response: This action will set/replace all structured properties on the entity. See PATCH operations to add/remove a single property. - + + +```graphql +mutation upsertStructuredProperties { + upsertStructuredProperties( + input: { + assetUrn: "urn:li:mydataset1", + structuredPropertyInputParams: [ + { + structuredPropertyUrn: "urn:li:structuredProperty:mystructuredproperty", + values: [ + { + stringValue: "123" + } + ] + } + ] + } + ) { + properties { + structuredProperty { + urn + } + } + } +} + +``` + + + You can set structured properties to a dataset by creating a dataset yaml file with structured properties. For example, below is a dataset yaml file with structured properties in both the field and dataset level. @@ -418,14 +482,16 @@ curl -X 'POST' -v \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ - "properties": [ - { - "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime", - "values": [ - {"double": 60.0} - ] - } - ] + "value": { + "properties": [ + { + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime", + "values": [ + {"double": 60.0} + ] + } + ] + } }' | jq ``` Example Response: @@ -466,6 +532,31 @@ Or you can run the following command to view the properties associated with the datahub dataset get --urn {urn} ``` +## Remove Structured Properties From a Dataset + +For removing a structured property or list of structured properties from a dataset: + + + + +```graphql +mutation removeStructuredProperties { + removeStructuredProperties( + input: { + assetUrn: "urn:li:mydataset1", + structuredPropertyUrns: ["urn:li:structuredProperty:mystructuredproperty"] + } + ) { + properties { + structuredProperty {urn} + } + } +} +``` + + + + ## Patch Structured Property Value This section will show you how to patch a structured property value - either by removing, adding, or upserting a single property. @@ -546,23 +637,25 @@ curl -X 'POST' -v \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ - "qualifiedName": "io.acryl.privacy.retentionTime02", - "displayName": "Retention Time 02", - "valueType": "urn:li:dataType:datahub.string", - "allowedValues": [ - { - "value": {"string": "foo2"}, - "description": "test foo2 value" - }, - { - "value": {"string": "bar2"}, - "description": "test bar2 value" - } - ], - "cardinality": "SINGLE", - "entityTypes": [ - "urn:li:entityType:datahub.dataset" - ] + "value": { + "qualifiedName": "io.acryl.privacy.retentionTime02", + "displayName": "Retention Time 02", + "valueType": "urn:li:dataType:datahub.string", + "allowedValues": [ + { + "value": {"string": "foo2"}, + "description": "test foo2 value" + }, + { + "value": {"string": "bar2"}, + "description": "test bar2 value" + } + ], + "cardinality": "SINGLE", + "entityTypes": [ + "urn:li:entityType:datahub.dataset" + ] + } }' | jq ``` @@ -605,24 +698,26 @@ Specically, this will set `io.acryl.privacy.retentionTime` as `60.0` and `io.acr ```shell curl -X 'POST' -v \ - 'http://localhost:8080/openapi/v3/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2CSampleHiveDataset%2CPROD%29/structuredProperties' \ + 'http://localhost:8080/openapi/v3/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2CSampleHiveDataset%2CPROD%29/structuredProperties?createIfNotExists=false' \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ - "properties": [ - { - "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime", - "values": [ - {"double": 60.0} - ] - }, - { - "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime02", - "values": [ - {"string": "bar2"} - ] - } - ] + "value": { + "properties": [ + { + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime", + "values": [ + {"double": 60.0} + ] + }, + { + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime02", + "values": [ + {"string": "bar2"} + ] + } + ] + } }' | jq ``` @@ -780,6 +875,38 @@ You can see that the first property has been removed and the second property is In this example, we'll add the property back with a different value, preserving the existing property. + + +```graphql +mutation updateStructuredProperty { + updateStructuredProperty( + input: { + urn: "urn:li:structuredProperty:retentionTime", + displayName: "Retention Time", + description: "Retention Time is used to figure out how long to retain records in a dataset", + newAllowedValues: [ + { + numberValue: 30, + description: "30 days, usually reserved for datasets that are ephemeral and contain pii" + }, + { + numberValue: 90, + description: "Use this for datasets that drive monthly reporting but contain pii" + }, + { + numberValue: 365, + description: "Use this for non-sensitive data that can be retained for longer" + } + ] + } + ) { + urn + } +} + +``` + + ```shell @@ -998,7 +1125,9 @@ curl -X 'POST' \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ -"removed": true + "value": { + "removed": true + } }' | jq ``` @@ -1019,11 +1148,13 @@ If you want to **remove the soft delete**, you can do so by either hard deleting ```shell curl -X 'POST' \ - 'http://localhost:8080/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.retentionTime/status?systemMetadata=false' \ + 'http://localhost:8080/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.retentionTime/status?systemMetadata=false&createIfNotExists=false' \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ -"removed": false + "value": { + "removed": true + } }' | jq ``` @@ -1158,34 +1289,42 @@ Change the cardinality to `SINGLE` and add a `version`. ```shell curl -X 'POST' -v \ - 'http://localhost:8080/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.retentionTime/propertyDefinition' \ + 'http://localhost:8080/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.retentionTime/propertyDefinition?createIfNotExists=false' \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ + "value": { "qualifiedName": "io.acryl.privacy.retentionTime", - "valueType": "urn:li:dataType:datahub.number", - "description": "Retention Time is used to figure out how long to retain records in a dataset", - "displayName": "Retention Time", - "cardinality": "SINGLE", - "version": "20240614080000", - "entityTypes": [ - "urn:li:entityType:datahub.dataset", - "urn:li:entityType:datahub.dataFlow" - ], - "allowedValues": [ - { - "value": {"double": 30}, - "description": "30 days, usually reserved for datasets that are ephemeral and contain pii" - }, - { - "value": {"double": 60}, - "description": "Use this for datasets that drive monthly reporting but contain pii" - }, - { - "value": {"double": 365}, - "description": "Use this for non-sensitive data that can be retained for longer" - } - ] + "valueType": "urn:li:dataType:datahub.number", + "description": "Retention Time is used to figure out how long to retain records in a dataset", + "displayName": "Retention Time", + "cardinality": "SINGLE", + "version": "20240614080000", + "entityTypes": [ + "urn:li:entityType:datahub.dataset", + "urn:li:entityType:datahub.dataFlow" + ], + "allowedValues": [ + { + "value": { + "double": 30 + }, + "description": "30 days, usually reserved for datasets that are ephemeral and contain pii" + }, + { + "value": { + "double": 60 + }, + "description": "Use this for datasets that drive monthly reporting but contain pii" + }, + { + "value": { + "double": 365 + }, + "description": "Use this for non-sensitive data that can be retained for longer" + } + ] + } }' | jq ``` diff --git a/docs/authorization/access-policies-guide.md b/docs/authorization/access-policies-guide.md index 2040d7ff79e99e..0f741a95282bd9 100644 --- a/docs/authorization/access-policies-guide.md +++ b/docs/authorization/access-policies-guide.md @@ -15,7 +15,9 @@ There are 2 types of Access Policy within DataHub:

    -**Platform** Policies determine who has platform-level Privileges on DataHub. These include: +## Platform + +Policies determine who has platform-level Privileges on DataHub. These include: - Managing Users & Groups - Viewing the DataHub Analytics Page @@ -31,7 +33,9 @@ A few Platform Policies in plain English include: - The Data Platform team should be allowed to manage users & groups, view platform analytics, & manage policies themselves - John from IT should be able to invite new users -**Metadata** policies determine who can do what to which Metadata Entities. For example: +## Metadata + +Metadata policies determine who can do what to which Metadata Entities. For example: - Who can edit Dataset Documentation & Links? - Who can add Owners to a Chart? @@ -51,17 +55,14 @@ A few **Metadata** Policies in plain English include: Each of these can be implemented by constructing DataHub Access Policies. -## Access Policies Setup, Prerequisites, and Permissions - -What you need to manage Access Policies on DataHub: +## Using Access Policies +:::note Required Access * **Manage Policies** Privilege This Platform Privilege allows users to create, edit, and remove all Access Policies on DataHub. Therefore, it should only be given to those users who will be serving as Admins of the platform. The default `Admin` role has this Privilege. - - -## Using Access Policies +::: Policies can be created by first navigating to **Settings > Permissions > Policies**. @@ -270,10 +271,5 @@ Policies only affect REST APIs when the environment variable `REST_API_AUTHORIZA Policies are the lowest level primitive for granting Privileges to users on DataHub. Roles are built for convenience on top of Policies. Roles grant Privileges to actors indirectly, driven by Policies -behind the scenes. Both can be used in conjunction to grant Privileges to end users. - - - -### Related Features - -- [Roles](./roles.md) \ No newline at end of file +behind the scenes. Both can be used in conjunction to grant Privileges to end users. For more information on roles +please refer to [Authorization > Roles](./roles.md). diff --git a/docs/authorization/policies.md b/docs/authorization/policies.md index 91b0241c7d5149..45d0b59e408337 100644 --- a/docs/authorization/policies.md +++ b/docs/authorization/policies.md @@ -49,14 +49,23 @@ and so on. A Metadata Policy can be broken down into 3 parts: -1. **Actors**: The 'who'. Specific users, groups that the policy applies to. +1. **Resources**: The 'which'. Resources that the policy applies to, e.g. "All Datasets". 2. **Privileges**: The 'what'. What actions are being permitted by a policy, e.g. "Add Tags". -3. **Resources**: The 'which'. Resources that the policy applies to, e.g. "All Datasets". +3. **Actors**: The 'who'. Specific users, groups that the policy applies to. -#### Actors +#### Resources + +Resources can be associated with the policy in a number of ways. -We currently support 3 ways to define the set of actors the policy applies to: a) list of users b) list of groups, and -c) owners of the entity. You also have the option to apply the policy to all users or groups. +1. List of resource types - The entity's type for example: dataset, chart, dashboard +2. List of resource URNs +3. List of tags +4. List of domains + +:::note Important Note +The associations in the list above are an *intersection* or an _AND_ operation. For example, if the policy targets +`1. resource type: dataset` and `3. resources tagged: 'myTag'`, it will apply to datasets that are tagged with tag 'myTag'. +::: #### Privileges @@ -64,55 +73,163 @@ Check out the list of privileges [here](https://github.com/datahub-project/datahub/blob/master/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java) . Note, the privileges are semantic by nature, and does not tie in 1-to-1 with the aspect model. -All edits on the UI are covered by a privilege, to make sure we have the ability to restrict write access. +All edits on the UI are covered by a privilege, to make sure we have the ability to restrict write access. See the +[Reference](#Reference) section below. + +#### Actors + +We currently support 3 ways to define the set of actors the policy applies to: + +1. list of users (or all users) +2. list of groups (or all groups) +3. owners of the entity + +:::note Important Note +Unlike resources, the definitions for actors are a union of the actors. For example, if user `1. Alice` is associated +with the policy as well as `3. owners of the entity`. This means that Alice _OR_ any owner of +the targeted resource(s) will be included in the policy. +::: + +## Managing Policies + +Policies can be managed on the page **Settings > Permissions > Policies** page. The `Policies` tab will only +be visible to those users having the `Manage Policies` privilege. -We currently support the following: +Out of the box, DataHub is deployed with a set of pre-baked Policies. The set of default policies are created at deploy +time and can be found inside the `policies.json` file within `metadata-service/war/src/main/resources/boot`. This set of policies serves the +following purposes: + +1. Assigns immutable super-user privileges for the root `datahub` user account (Immutable) +2. Assigns all Platform privileges for all Users by default (Editable) + +The reason for #1 is to prevent people from accidentally deleting all policies and getting locked out (`datahub` super user account can be a backup) +The reason for #2 is to permit administrators to log in via OIDC or another means outside of the `datahub` root account +when they are bootstrapping with DataHub. This way, those setting up DataHub can start managing policies without friction. +Note that these privilege *can* and likely *should* be altered inside the **Policies** page of the UI. + +:::note Pro-Tip +To login using the `datahub` account, simply navigate to `/login` and enter `datahub`, `datahub`. Note that the password can be customized for your +deployment by changing the `user.props` file within the `datahub-frontend` module. Notice that JaaS authentication must be enabled. +:::note + +## Configuration + +By default, the Policies feature is *enabled*. This means that the deployment will support creating, editing, removing, and +most importantly enforcing fine-grained access policies. + +In some cases, these capabilities are not desirable. For example, if your company's users are already used to having free reign, you +may want to keep it that way. Or perhaps it is only your Data Platform team who actively uses DataHub, in which case Policies may be overkill. + +For these scenarios, we've provided a back door to disable Policies in your deployment of DataHub. This will completely hide +the policies management UI and by default will allow all actions on the platform. It will be as though +each user has *all* privileges, both of the **Platform** & **Metadata** flavor. + +To disable Policies, you can simply set the `AUTH_POLICIES_ENABLED` environment variable for the `datahub-gms` service container +to `false`. For example in your `docker/datahub-gms/docker.env`, you'd place + +``` +AUTH_POLICIES_ENABLED=false +``` + +### REST API Authorization + +Policies only affect REST APIs when the environment variable `REST_API_AUTHORIZATION` is set to `true` for GMS. Some policies only apply when this setting is enabled, marked above, and other Metadata and Platform policies apply to the APIs where relevant, also specified in the table above. + +## Reference + +For a complete list of privileges see the +privileges [here](https://github.com/datahub-project/datahub/blob/master/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java). + +### Platform-level privileges -##### Platform-level privileges These privileges are for DataHub operators to access & manage the administrative functionality of the system. -| Platform Privileges | Description | -|-----------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| Generate Personal Access Tokens | Allow actor to generate personal access tokens for use with DataHub APIs. | -| Manage Domains | Allow actor to create and remove Asset Domains. | -| Manage Home Page Posts | Allow actor to create and delete home page posts | -| Manage Glossaries | Allow actor to create, edit, and remove Glossary Entities | -| Manage Tags | Allow actor to create and remove Tags. | -| Manage Business Attribute | Allow actor to create, update, delete Business Attribute | -| Manage Documentation Forms | Allow actor to manage forms assigned to assets to assist in documentation efforts. | -| Manage Policies | Allow actor to create and remove access control policies. Be careful - Actors with this privilege are effectively super users. | -| Manage Metadata Ingestion | Allow actor to create, remove, and update Metadata Ingestion sources. | -| Manage Secrets | Allow actor to create & remove Secrets stored inside DataHub. | -| Manage Users & Groups | Allow actor to create, remove, and update users and groups on DataHub. | -| View Analytics | Allow actor to view the DataHub analytics dashboard. | -| Manage All Access Tokens | Allow actor to create, list and revoke access tokens on behalf of users in DataHub. Be careful - Actors with this privilege are effectively super users that can impersonate other users. | -| Manage User Credentials | Allow actor to manage credentials for native DataHub users, including inviting new users and resetting passwords | -| Manage Public Views | Allow actor to create, update, and delete any Public (shared) Views. | -| Manage Ownership Types | Allow actor to create, update and delete Ownership Types. | -| Create Business Attribute | Allow actor to create new Business Attribute. | -| Manage Connections | Allow actor to manage connections to external DataHub platforms. | -| Restore Indices API[^1] | Allow actor to use the Restore Indices API. | -| Get Timeseries index sizes API[^1] | Allow actor to use the get Timeseries indices size API. | -| Truncate timeseries aspect index size API[^1] | Allow actor to use the API to truncate a timeseries index. | -| Get ES task status API[^1] | Allow actor to use the get task status API for an ElasticSearch task. | -| Enable/Disable Writeability API[^1] | Allow actor to enable or disable GMS writeability for data migrations. | -| Apply Retention API[^1] | Allow actor to apply retention using the API. | -| Analytics API access[^1] | Allow actor to use API read access to raw analytics data. | -| Manage Tests[^2] | Allow actor to create and remove Asset Tests. | -| View Metadata Proposals[^2] | Allow actor to view the requests tab for viewing metadata proposals. | -| Create metadata constraints[^2] | Allow actor to create metadata constraints. | -| Manage Platform Settings[^2] | Allow actor to view and change platform-level settings, like integrations & notifications. | -| Manage Monitors[^2] | Allow actor to create, update, and delete any data asset monitors, including Custom SQL monitors. Grant with care. | +#### Access & Credentials + +| Platform Privileges | Description | +|--------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Generate Personal Access Tokens | Allow actor to generate personal access tokens for use with DataHub APIs. | +| Manage Policies | Allow actor to create and remove access control policies. Be careful - Actors with this privilege are effectively super users. | +| Manage Secrets | Allow actor to create & remove Secrets stored inside DataHub. | +| Manage Users & Groups | Allow actor to create, remove, and update users and groups on DataHub. | +| Manage All Access Tokens | Allow actor to create, list and revoke access tokens on behalf of users in DataHub. Be careful - Actors with this privilege are effectively super users that can impersonate other users. | +| Manage User Credentials | Allow actor to manage credentials for native DataHub users, including inviting new users and resetting passwords | | +| Manage Connections | Allow actor to manage connections to external DataHub platforms. | + +#### Product Features + +| Platform Privileges | Description | +|-------------------------------------|--------------------------------------------------------------------------------------------------------------------| +| Manage Home Page Posts | Allow actor to create and delete home page posts | +| Manage Business Attribute | Allow actor to create, update, delete Business Attribute | +| Manage Documentation Forms | Allow actor to manage forms assigned to assets to assist in documentation efforts. | +| Manage Metadata Ingestion | Allow actor to create, remove, and update Metadata Ingestion sources. | +| Manage Features | Umbrella privilege to manage all features. | +| View Analytics | Allow actor to view the DataHub analytics dashboard. | +| Manage Public Views | Allow actor to create, update, and delete any Public (shared) Views. | +| Manage Ownership Types | Allow actor to create, update and delete Ownership Types. | +| Create Business Attribute | Allow actor to create new Business Attribute. | +| Manage Structured Properties | Manage structured properties in your instance. | +| View Tests | View Asset Tests. | +| Manage Tests[^2] | Allow actor to create and remove Asset Tests. | +| View Metadata Proposals[^2] | Allow actor to view the requests tab for viewing metadata proposals. | +| Create metadata constraints[^3] | Allow actor to create metadata constraints. | +| Manage Platform Settings[^2] | Allow actor to view and change platform-level settings, like integrations & notifications. | +| Manage Monitors[^2] | Allow actor to create, update, and delete any data asset monitors, including Custom SQL monitors. Grant with care. | [^1]: Only active if REST_API_AUTHORIZATION_ENABLED is true [^2]: DataHub Cloud only +[^3]: Deprecated feature + +#### Entity Management + +| Platform Privileges | Description | +|-------------------------------------|------------------------------------------------------------------------------------| +| Manage Domains | Allow actor to create and remove Asset Domains. | +| Manage Glossaries | Allow actor to create, edit, and remove Glossary Entities | +| Manage Tags | Allow actor to create and remove Tags. | + +#### System Management + +| Platform Privileges | Description | +|-----------------------------------------------|--------------------------------------------------------------------------| +| Restore Indices API[^1] | Allow actor to use the Restore Indices API. | | +| Get Timeseries index sizes API[^1] | Allow actor to use the get Timeseries indices size API. | +| Truncate timeseries aspect index size API[^1] | Allow actor to use the API to truncate a timeseries index. | +| Get ES task status API[^1] | Allow actor to use the get task status API for an ElasticSearch task. | +| Enable/Disable Writeability API[^1] | Allow actor to enable or disable GMS writeability for data migrations. | +| Apply Retention API[^1] | Allow actor to apply retention using the API. | +| Analytics API access[^1] | Allow actor to use API read access to raw analytics data. | + +[^1]: Only active if REST_API_AUTHORIZATION_ENABLED is true +[^2]: DataHub Cloud only + +### Common Metadata Privileges -##### Common metadata privileges These privileges are to view & modify any entity within DataHub. -| Common Privileges | Description | +#### Entity Privileges + +| Entity Privileges | Description | |-------------------------------------|--------------------------------------------------------------------------------------------| | View Entity Page | Allow actor to view the entity page. | +| Edit Entity | Allow actor to edit any information about an entity. Super user privileges for the entity. | +| Delete | Allow actor to delete this entity. | +| Create Entity | Allow actor to create an entity if it doesn't exist. | +| Entity Exists | Allow actor to determine whether the entity exists. | +| Get Timeline API[^1] | Allow actor to use the GET Timeline API. | +| Get Entity + Relationships API[^1] | Allow actor to use the GET Entity and Relationships API. | +| Get Aspect/Entity Count APIs[^1] | Allow actor to use the GET Aspect/Entity Count APIs. | +| View Entity[^2] | Allow actor to view the entity in search results. | +| Share Entity[^2] | Allow actor to share an entity with another DataHub Cloud instance. | + +[^1]: Only active if REST_API_AUTHORIZATION_ENABLED is true +[^2]: DataHub Cloud only + +#### Aspect Privileges + +| Aspect Privileges | Description | +|-------------------------------------|--------------------------------------------------------------------------------------------| | Edit Tags | Allow actor to add and remove tags to an asset. | | Edit Glossary Terms | Allow actor to add and remove glossary terms to an asset. | | Edit Description | Allow actor to edit the description (documentation) of an entity. | @@ -122,35 +239,57 @@ These privileges are to view & modify any entity within DataHub. | Edit Data Product | Allow actor to edit the Data Product of an entity. | | Edit Deprecation | Allow actor to edit the Deprecation status of an entity. | | Edit Incidents | Allow actor to create and remove incidents for an entity. | -| Edit Entity | Allow actor to edit any information about an entity. Super user privileges for the entity. | | Edit Lineage | Allow actor to add and remove lineage edges for this entity. | | Edit Properties | Allow actor to edit the properties for an entity. | | Edit Owners | Allow actor to add and remove owners of an entity. | -| Delete | Allow actor to delete this entity. | -| Search API[^1] | Allow actor to access search APIs. | -| Get Aspect/Entity Count APIs[^1] | Allow actor to use the GET Aspect/Entity Count APIs. | | Get Timeseries Aspect API[^1] | Allow actor to use the GET Timeseries Aspect API. | -| Get Entity + Relationships API[^1] | Allow actor to use the GET Entity and Relationships API. | -| Get Timeline API[^1] | Allow actor to use the GET Timeline API. | + +[^1]: Only active if REST_API_AUTHORIZATION_ENABLED is true +[^2]: DataHub Cloud only + +#### Proposals + +| Proposals Privileges | Description | +|------------------------------------|--------------------------------------------------------------------------------------------| +| Propose Tags[^2] | Allow actor to propose adding a tag to an asset. | +| Propose Glossary Terms[^2] | Allow actor to propose adding a glossary term to an asset. | +| Propose Documentation[^2] | Allow actor to propose updates to an asset's documentation. | +| Manage Tag Proposals[^2] | Allow actor to manage a proposal to add a tag to an asset. | +| Manage Glossary Term Proposals[^2] | Allow actor to manage a proposal to add a glossary term to an asset. | +| Manage Documentation Proposals[^2] | Allow actor to manage a proposal update an asset's documentation | + +[^1]: Only active if REST_API_AUTHORIZATION_ENABLED is true +[^2]: DataHub Cloud only + +#### System Management + +| System Privileges | Description | +|-------------------------------------|--------------------------------------------------------------------------------------------| | Explain ElasticSearch Query API[^1] | Allow actor to use the Operations API explain endpoint. | | Produce Platform Event API[^1] | Allow actor to produce Platform Events using the API. | -| Create Entity | Allow actor to create an entity if it doesn't exist. | -| Entity Exists | Allow actor to determine whether the entity exists. | -| View Entity[^2] | Allow actor to view the entity in search results. | -| Propose Tags[^2] | Allow actor to propose adding a tag to an asset. | -| Propose Glossary Terms[^2] | Allow actor to propose adding a glossary term to an asset. | -| Propose Documentation[^2] | Allow actor to propose updates to an asset's documentation. | -| Manage Tag Proposals[^2] | Allow actor to manage a proposal to add a tag to an asset. | -| Manage Glossary Term Proposals[^2] | Allow actor to manage a proposal to add a glossary term to an asset. | -| Manage Documentation Proposals[^2] | Allow actor to manage a proposal update an asset's documentation | -| Share Entity[^2] | Allow actor to share an entity with another DataHub Cloud instance. | [^1]: Only active if REST_API_AUTHORIZATION_ENABLED is true [^2]: DataHub Cloud only -##### Specific entity-level privileges +### Specific Entity-level Privileges These privileges are not generalizable. +#### Users & Groups + +| Entity | Privilege | Description | +|--------------|-------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Group | Edit Group Members | Allow actor to add and remove members to a group. | +| Group | Manage Group Notification Settings[^2] | Allow actor to manage notification settings for a group. | +| Group | Manage Group Subscriptions[^2] | Allow actor to manage subscriptions for a group. | +| Group | Edit Contact Information | Allow actor to change the contact information such as email & chat handles. | +| User | Edit Contact Information | Allow actor to change the contact information such as email & chat handles. | +| User | Edit User Profile | Allow actor to change the user's profile including display name, bio, title, profile image, etc. | + +[^1]: Only active if REST_API_AUTHORIZATION_ENABLED is true +[^2]: DataHub Cloud only + +#### Dataset + | Entity | Privilege | Description | |--------------|-------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | Dataset | View Dataset Usage | Allow actor to access dataset usage information (includes usage statistics and queries). | @@ -174,101 +313,22 @@ These privileges are not generalizable. | Domain | Manage Data Products | Allow actor to create, edit, and delete Data Products within a Domain | | GlossaryNode | Manage Direct Glossary Children | Allow actor to create and delete the direct children of this entity. | | GlossaryNode | Manage All Glossary Children | Allow actor to create and delete everything underneath this entity. | -| Group | Edit Group Members | Allow actor to add and remove members to a group. | -| Group | Manage Group Notification Settings[^2] | Allow actor to manage notification settings for a group. | -| Group | Manage Group Subscriptions[^2] | Allow actor to manage subscriptions for a group. | -| Group | Edit Contact Information | Allow actor to change the contact information such as email & chat handles. | -| User | Edit Contact Information | Allow actor to change the contact information such as email & chat handles. | -| User | Edit User Profile | Allow actor to change the user's profile including display name, bio, title, profile image, etc. | - -#### Resources - -Resource filter defines the set of resources that the policy applies to is defined using a list of criteria. Each -criterion defines a field type (like type, urn, domain), a list of field values to compare, and a -condition (like EQUALS). It essentially checks whether the field of a certain resource matches any of the input values. -Note, that if there are no criteria or resource is not set, policy is applied to ALL resources. - -For example, the following resource filter will apply the policy to datasets, charts, and dashboards under domain 1. - -```json -{ - "resources": { - "filter": { - "criteria": [ - { - "field": "TYPE", - "condition": "EQUALS", - "values": [ - "dataset", - "chart", - "dashboard" - ] - }, - { - "field": "DOMAIN", - "values": [ - "urn:li:domain:domain1" - ], - "condition": "EQUALS" - } - ] - } - } -} -``` -Where `resources` is inside the `info` aspect of a Policy. - -Supported fields are as follows - -| Field Type | Description | Example | -|---------------|------------------------|-------------------------| -| type | Type of the resource | dataset, chart, dataJob | -| urn | Urn of the resource | urn:li:dataset:... | -| domain | Domain of the resource | urn:li:domain:domainX | - -## Managing Policies - -Policies can be managed on the page **Settings > Permissions > Policies** page. The `Policies` tab will only -be visible to those users having the `Manage Policies` privilege. - -Out of the box, DataHub is deployed with a set of pre-baked Policies. The set of default policies are created at deploy -time and can be found inside the `policies.json` file within `metadata-service/war/src/main/resources/boot`. This set of policies serves the -following purposes: - -1. Assigns immutable super-user privileges for the root `datahub` user account (Immutable) -2. Assigns all Platform privileges for all Users by default (Editable) - -The reason for #1 is to prevent people from accidentally deleting all policies and getting locked out (`datahub` super user account can be a backup) -The reason for #2 is to permit administrators to log in via OIDC or another means outside of the `datahub` root account -when they are bootstrapping with DataHub. This way, those setting up DataHub can start managing policies without friction. -Note that these privilege *can* and likely *should* be altered inside the **Policies** page of the UI. - -> Pro-Tip: To login using the `datahub` account, simply navigate to `/login` and enter `datahub`, `datahub`. Note that the password can be customized for your -deployment by changing the `user.props` file within the `datahub-frontend` module. Notice that JaaS authentication must be enabled. - -## Configuration - -By default, the Policies feature is *enabled*. This means that the deployment will support creating, editing, removing, and -most importantly enforcing fine-grained access policies. - -In some cases, these capabilities are not desirable. For example, if your company's users are already used to having free reign, you -may want to keep it that way. Or perhaps it is only your Data Platform team who actively uses DataHub, in which case Policies may be overkill. -For these scenarios, we've provided a back door to disable Policies in your deployment of DataHub. This will completely hide -the policies management UI and by default will allow all actions on the platform. It will be as though -each user has *all* privileges, both of the **Platform** & **Metadata** flavor. -To disable Policies, you can simply set the `AUTH_POLICIES_ENABLED` environment variable for the `datahub-gms` service container -to `false`. For example in your `docker/datahub-gms/docker.env`, you'd place +[^1]: Only active if REST_API_AUTHORIZATION_ENABLED is true +[^2]: DataHub Cloud only -``` -AUTH_POLICIES_ENABLED=false -``` +#### Misc -### REST API Authorization - -Policies only affect REST APIs when the environment variable `REST_API_AUTHORIZATION` is set to `true` for GMS. Some policies only apply when this setting is enabled, marked above, and other Metadata and Platform policies apply to the APIs where relevant, also specified in the table above. +| Entity | Privilege | Description | +|--------------|-------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Tag | Edit Tag Color | Allow actor to change the color of a Tag. | +| Domain | Manage Data Products | Allow actor to create, edit, and delete Data Products within a Domain | +| GlossaryNode | Manage Direct Glossary Children | Allow actor to create and delete the direct children of this entity. | +| GlossaryNode | Manage All Glossary Children | Allow actor to create and delete everything underneath this entity. | +[^1]: Only active if REST_API_AUTHORIZATION_ENABLED is true +[^2]: DataHub Cloud only ## Coming Soon @@ -278,7 +338,7 @@ The DataHub team is hard at work trying to improve the Policies feature. We are Under consideration -- Ability to define Metadata Policies against multiple reosurces scoped to particular "Containers" (e.g. A "schema", "database", or "collection") +- Ability to define Metadata Policies against multiple resources scoped to particular "Containers" (e.g. A "schema", "database", or "collection") ## Feedback / Questions / Concerns diff --git a/docs/authorization/roles.md b/docs/authorization/roles.md index 7c7b4581faffca..a1719438d2941b 100644 --- a/docs/authorization/roles.md +++ b/docs/authorization/roles.md @@ -156,10 +156,12 @@ These privileges are only relevant to DataHub Cloud. |-----------------------------|--------------------|--------------------|--------|-----------------------------------------------------------------------------------------------------| | Manage Tests | :heavy_check_mark: | :heavy_check_mark: | :x: | Create and remove Asset Tests. | | View Metadata Proposals | :heavy_check_mark: | :heavy_check_mark: | :x: | View the requests tab for viewing metadata proposals. | -| Create metadata constraints | :heavy_check_mark: | :heavy_check_mark: | :x: | Create metadata constraints. | +| Create metadata constraints[^1] | :heavy_check_mark: | :heavy_check_mark: | :x: | Create metadata constraints. | | Manage Platform Settings | :heavy_check_mark: | :x: | :x: | View and change platform-level settings, like integrations & notifications. | | Manage Monitors | :heavy_check_mark: | :x: | :x: | Create, update, and delete any data asset monitors, including Custom SQL monitors. Grant with care. | +[^1]: Deprecated feature + ##### Metadata Privileges | Privilege | Admin | Editor | Reader | Description | diff --git a/docs/automation/docs-propagation.md b/docs/automation/docs-propagation.md new file mode 100644 index 00000000000000..a637afcde4dca7 --- /dev/null +++ b/docs/automation/docs-propagation.md @@ -0,0 +1,128 @@ +# Documentation Propagation Automation + +## Introduction + +Documentation Propagation is an automation automatically propagates column and asset (coming soon) descriptions based on downstream column-level lineage and sibling relationships. +It simplifies metadata management by ensuring consistency and reducing the manual effort required for documenting data assets to aid +in Data Governance & Compliance along with Data Discovery. + +This feature is enabled by default in Open Source DataHub. + +## Capabilities + +### Open Source +- **Column-Level Docs Propagation**: Automatically propagate documentation to downstream columns and sibling columns that are derived or dependent on the source column. +- **(Coming Soon) Asset-Level Docs Propagation**: Propagate descriptions to sibling assets. + +### DataHub Cloud (Acryl) +- Includes all the features of Open Source. +- **Propagation Rollback (Undo)**: Offers the ability to undo any propagation changes, providing a safety net against accidental updates. +- **Historical Backfilling**: Automatically backfills historical data for newly documented columns to maintain consistency across time. + +### Comparison of Features + +| Feature | Open Source | DataHub Cloud | +|---------------------------------|-------------|---------------| +| Column-Level Docs Propagation | ✔️ | ✔️ | +| Asset-Level Docs Propagation | ✔️ | ✔️ | +| Downstream Lineage + Siblings | ✔️ | ✔️ | +| Propagation Rollback (Undo) | ❌ | ✔️ | +| Historical Backfilling | ❌ | ✔️ | + +## Enabling Documentation Propagation + +### In Open Source + +Notice that the user must have the `Manage Ingestion` permission to view and enable the feature. + +1. **Navigate to Settings**: Click on the 'Settings' gear in top navigation bar. + +

    + +

    + +2. **Navigate to Features**: Click on the 'Features' tab in the left-hand navigation bar. + +

    + +

    + +3**Enable Documentation Propagation**: Locate the 'Documentation Propagation' section and toggle the feature to enable it for column-level and asset-level propagation. +Currently, Column Level propagation is supported, with asset level propagation coming soon. + +

    + +

    + + +### In DataHub Cloud + +1. **Navigate to Automations**: Click on 'Govern' > 'Automations' in the navigation bar. + +

    + +

    + +2. **Create An Automation**: Click on 'Create' and select 'Column Documentation Propagation'. + +

    + +

    + +3. **Configure Automation**: Fill in the required fields, such as the name, description, and category. Finally, click 'Save and Run' to start the automation + +

    + +

    + +## Propagating for Existing Assets (DataHub Cloud Only) + +In DataHub Cloud, you can back-fill historical data for existing assets to ensure that all existing column descriptions are propagated to downstreams +when you start the automation. Note that it may take some time to complete the initial back-filling process, depending on the number of assets and the complexity of your lineage. + +To do this, navigate to the Automation you created in Step 3 above, click the 3-dot "more" menu: + +

    + +

    + +and then click "Initialize". + +

    + +

    + +This one-time step will kick off the back-filling process for existing descriptions. If you only want to begin propagating +descriptions going forward, you can skip this step. + +## Rolling Back Propagated Descriptions (DataHub Cloud Only) + +In DataHub Cloud, you can rollback all descriptions that have been propagated historically. + +This feature allows you to "clean up" or "undo" any accidental propagation that may have occurred automatically, in the case +that you no longer want propagated descriptions to be visible. + +To do this, navigate to the Automation you created in Step 3 above, click the 3-dot "More" menu + +

    + +

    + +and then click "Rollback". + +

    + +

    + +This one-time step will remove all propagated tags and glossary terms from Snowflake. To simply stop propagating new tags, you can disable the automation. + +## Viewing Propagated Descriptions + +Once the automation is enabled, you'll be able to recognize propagated descriptions as those with the thunderbolt icon next to them: + +The tooltip will provide additional information, including where the description originated and any intermediate hops that were +used to propagate the description. + +

    + +

    \ No newline at end of file diff --git a/docs/automation/snowflake-tag-propagation.md b/docs/automation/snowflake-tag-propagation.md new file mode 100644 index 00000000000000..bdc80376dfb484 --- /dev/null +++ b/docs/automation/snowflake-tag-propagation.md @@ -0,0 +1,88 @@ + +import FeatureAvailability from '@site/src/components/FeatureAvailability'; + +# Snowflake Tag Propagation Automation + + + +## Introduction + +Snowflake Tag Propagation is an automation that allows you to sync DataHub Glossary Terms and Tags on +both columns and tables back to Snowflake. This automation is available in DataHub Cloud (Acryl) only. + +## Capabilities + +- Automatically Add DataHub Glossary Terms to Snowflake Tables and Columns +- Automatically Add DataHub Tags to Snowflake Tables and Columns +- Automatically Remove DataHub Glossary Terms and Tags from Snowflake Tables and Columns when they are removed in DataHub + +## Enabling Snowflake Tag Sync + +1. **Navigate to Automations**: Click on 'Govern' > 'Automations' in the navigation bar. + +

    + +

    + +2. **Create An Automation**: Click on 'Create' and select 'Snowflake Tag Propagation'. + +

    + +

    + +3. **Configure Automation**: Fill in the required fields to connect to Snowflake, along with the name, description, and category. +Note that you can limit propagation based on specific Tags and Glossary Terms. If none are selected, then ALL Tags or Glossary Terms will be automatically +propagated to Snowflake tables and columns. Finally, click 'Save and Run' to start the automation + +

    + +

    + +## Propagating for Existing Assets + +You can back-fill historical data for existing assets to ensure that all existing column and table Tags and Glossary Terms are propagated to Snowflake. +Note that it may take some time to complete the initial back-filling process, depending on the number of Snowflake assets you have. + +To do so, navigate to the Automation you created in Step 3 above, click the 3-dot "More" menu + +

    + +

    + +and then click "Initialize". + +

    + +

    + +This one-time step will kick off the back-filling process for existing descriptions. If you only want to begin propagating +descriptions going forward, you can skip this step. + +## Rolling Back Propagated Tags + +You can rollback all tags and glossary terms that have been propagated historically. + +This feature allows you to "clean up" or "undo" any accidental propagation that may have occurred automatically, in the case +that you no longer want propagated descriptions to be visible. + +To do this, navigate to the Automation you created in Step 3 above, click the 3-dot "More" menu + +

    + +

    + +and then click "Rollback". + +

    + +

    + +This one-time step will remove all propagated tags and glossary terms from Snowflake. To simply stop propagating new tags, you can disable the automation. + +## Viewing Propagated Tags + +You can view propagated Tags (and corresponding DataHub URNs) inside the Snowflake UI to confirm the automation is working as expected. + +

    + +

    diff --git a/docs/developers.md b/docs/developers.md index 0c9d7bee3d79f2..401169490dd4b6 100644 --- a/docs/developers.md +++ b/docs/developers.md @@ -46,7 +46,7 @@ Use [gradle wrapper](https://docs.gradle.org/current/userguide/gradle_wrapper.ht ./gradlew build ``` -Note that the above will also run run tests and a number of validations which makes the process considerably slower. +Note that the above will also run tests and a number of validations which makes the process considerably slower. We suggest partially compiling DataHub according to your needs: diff --git a/docs/features/feature-guides/documentation-forms.md b/docs/features/feature-guides/documentation-forms.md index b007892e660946..2edeb8ce302d77 100644 --- a/docs/features/feature-guides/documentation-forms.md +++ b/docs/features/feature-guides/documentation-forms.md @@ -101,7 +101,7 @@ You sure can! Please keep in mind that an Asset will only be considered Document ### API Tutorials -- [Create a Documentation Form](../../../docs/api/tutorials/forms.md) +- [API Guides on Documentation Form](../../../docs/api/tutorials/forms.md) :::note You must create a Structured Property before including it in a Documentation Form. diff --git a/docs/how/kafka-config.md b/docs/how/kafka-config.md index 2f20e8b548f835..06c7418f167136 100644 --- a/docs/how/kafka-config.md +++ b/docs/how/kafka-config.md @@ -116,6 +116,27 @@ We've included an environment variable to customize the consumer group id, if yo - `KAFKA_CONSUMER_GROUP_ID`: The name of the kafka consumer's group id. +#### datahub-mae-consumer MCL Hooks + +By default, all MetadataChangeLog processing hooks execute as part of the same kafka consumer group based on the +previously mentioned `KAFKA_CONSUMER_GROUP_ID`. + +The various MCL Hooks could alsp be separated into separate groups which allows for controlling parallelization and +prioritization of the hooks. + +For example, the `UpdateIndicesHook` and `SiblingsHook` processing can be delayed by other hooks. Separating these +hooks into their own group can reduce latency from these other hooks. The `application.yaml` configuration +includes options for assigning a suffix to the consumer group, see `consumerGroupSuffix`. + +| Environment Variable | Default | Description | +|------------------------------------------------|---------|---------------------------------------------------------------------------------------------| +| SIBLINGS_HOOK_CONSUMER_GROUP_SUFFIX | '' | Siblings processing hook. Considered one of the primary hooks in the `datahub-mae-consumer` | +| UPDATE_INDICES_CONSUMER_GROUP_SUFFIX | '' | Primary processing hook. | +| INGESTION_SCHEDULER_HOOK_CONSUMER_GROUP_SUFFIX | '' | Scheduled ingestion hook. | +| INCIDENTS_HOOK_CONSUMER_GROUP_SUFFIX | '' | Incidents hook. | +| ECE_CONSUMER_GROUP_SUFFIX | '' | Entity Change Event hook which publishes to the Platform Events topic. | +| FORMS_HOOK_CONSUMER_GROUP_SUFFIX | '' | Forms processing. | + ## Applying Configurations ### Docker diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index 08ababcb5cfce9..2443375099b7b2 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -20,6 +20,16 @@ This file documents any backwards-incompatible changes in DataHub and assists pe ### Breaking Changes +### Potential Downtime + +### Deprecations + +### Other Notable Changes + +## 0.14.0 + +### Breaking Changes + - Protobuf CLI will no longer create binary encoded protoc custom properties. Flag added `-protocProp` in case this behavior is required. - #10814 Data flow info and data job info aspect will produce an additional field that will require a corresponding upgrade of server. Otherwise server can reject the aspects. diff --git a/docs/lineage/airflow.md b/docs/lineage/airflow.md index 2d7707637e2d1c..65da1fd5251dc9 100644 --- a/docs/lineage/airflow.md +++ b/docs/lineage/airflow.md @@ -18,7 +18,7 @@ There's two actively supported implementations of the plugin, with different Air | Approach | Airflow Version | Notes | | --------- | --------------- | --------------------------------------------------------------------------- | | Plugin v2 | 2.3.4+ | Recommended. Requires Python 3.8+ | -| Plugin v1 | 2.1+ | No automatic lineage extraction; may not extract lineage if the task fails. | +| Plugin v1 | 2.1 - 2.8 | No automatic lineage extraction; may not extract lineage if the task fails. | If you're using Airflow older than 2.1, it's possible to use the v1 plugin with older versions of `acryl-datahub-airflow-plugin`. See the [compatibility section](#compatibility) for more details. @@ -84,7 +84,7 @@ enabled = True # default ### Installation -The v1 plugin requires Airflow 2.1+ and Python 3.8+. If you're on older versions, it's still possible to use an older version of the plugin. See the [compatibility section](#compatibility) for more details. +The v1 plugin requires Airflow 2.1 - 2.8 and Python 3.8+. If you're on older versions, it's still possible to use an older version of the plugin. See the [compatibility section](#compatibility) for more details. If you're using Airflow 2.3+, we recommend using the v2 plugin instead. If you need to use the v1 plugin with Airflow 2.3+, you must also set the environment variable `DATAHUB_AIRFLOW_PLUGIN_USE_V1_PLUGIN=true`. diff --git a/docs/managed-datahub/configuring-identity-provisioning-with-okta.md b/docs/managed-datahub/configuring-identity-provisioning-with-okta.md new file mode 100644 index 00000000000000..a7939b514166da --- /dev/null +++ b/docs/managed-datahub/configuring-identity-provisioning-with-okta.md @@ -0,0 +1,119 @@ +--- +title: "SCIM Integration: Okta and DataHub" +hide_title: true +--- +import FeatureAvailability from '@site/src/components/FeatureAvailability'; + +## SCIM Integration: Okta and DataHub + + +## Overview +This document covers the steps required to enable SCIM provisioning from Okta to DataHub. + +This document assumes you are using OIDC for SSO with DataHub. +Since Okta doesn't currently support SCIM with OIDC, you would need to create an additional SWA-app-integration to enable SCIM provisioning. + +On completing the steps in this guide, Okta will start automatically pushing changes to users/groups of this SWA-app-integration to DataHub, thereby simplifying provisioning of users/groups in DataHub. + +### Why SCIM provisioning? +Let us look at an example of the flows enabled through SCIM provisioning. + +Consider the following configuration in Okta +- A group `governance-team` +- And it has two members `john` and `sid` +- And the group has role `Reader` + +Through SCIM provisioning, the following are enabled: +* If the `governance-team` group is assigned to the DataHub app in Okta with the role `Reader`, Okta will create the users `john` and `sid` in DataHub with the `Reader` role. +* If you remove `john` from group `governance-team` then `john` would automatically get deactivated in DataHub. +* If you remove `sid` from the DataHub app in Okta, then `sid` would automatically get deactivated in DataHub. + +Generally, any user assignment/unassignment to the app in Okta - directly or through groups - are automatically reflected in the DataHub application. + +This guide also covers other variations such as how to assign a role to a user directly, and how group-information can be pushed to DataHub. + +> Only Admin, Editor and Reader roles are supported in DataHub. These roles are preconfigured/created on DataHub. + +## Configuring SCIM provisioning + +### 1. Create an SWA app integration +a). Create a new [SWA app integration](https://help.okta.com/en-us/content/topics/apps/apps_app_integration_wizard_swa.htm), called say, `DataHub-SCIM-SWA`. + +Note: this app-integration will only be used for SCIM provisioning. You would continue to use the existing OIDC-app-integration for SSO. + +b). In the `General` tab of the `DataHub-SCIM-SWA` application, check the `Enable SCIM provisioning` option + +

    + +

    + +You may also want to configure the other selections as shown in the above image, so that this application isn't visible to your users. + +### 2. Configure SCIM + +a). Generate a personal access token from [DataHub](../../docs/authentication/personal-access-tokens.md#creating-personal-access-tokens). + +b). In the `Provisioning` tab, configure the DataHub-SCIM endpoint as shown in the below image: + +

    + +

    + +**Note**: Set the value of the `Bearer` field to the personal access token obtained in step (a) above. + +c). Configure the `To App` section as shown below: + +

    + +

    + +**Note**: We are not pushing passwords to DataHub over SCIM, since we are assuming SSO with OIDC as mentioned earlier. + +### 3. Add a custom attribute to represent roles +a). Navigate to `Directory` -> `Profile Editor`, and select the user-profile of this new application. + +

    + +

    + +b). Click `Add Attribute` and define a new attribute that will be used to specify the role of a DataHub user. + +

    + +

    + +* Set value of `External name` to `roles.^[primary==true].value` +* Set value of `External namespace` to `urn:ietf:params:scim:schemas:core:2.0:User` +* Define an enumerated list of values as shown in the above image +* Mark this attribute as required +* Select `Attribute type` as `Personal` + +c). Add a similar attribute for groups i.e. repeat step (b) above, but select `Attribute Type` as `Group`. (Specify the variable name as, say, `dataHubGroupRoles`.) + +### 4. Assign users & groups to the app +Assign users and groups to the app from the `Assignments` tab: + +

    + +

    + +While assigning a user/group, choose an appropriate value for the dataHubRoles/dataHubGroupRoles attribute. +Note that when a role is selected for a group, the corresponding role is pushed for all users of that group in DataHub. + +### The provisioning setup is now complete +Once the above steps are completed, user assignments/unassignments to the DataHub-SCIM-SWA app in Okta will get reflected in DataHub automatically. + +> #### A note on user deletion +>Note that when users are unassigned or deactivated in Okta, the corresponding users in DataHub are also deactivated (marked "suspended"). +But when a user is *deleted* in Okta, the corresponding user in DataHub does *not* get deleted. +Refer the Okta documentation on [Delete (Deprovision)](https://developer.okta.com/docs/concepts/scim/#delete-deprovision) for more details. + +### 5. (Optional): Configure push groups +When groups are assigned to the app, Okta pushes the group-members as users to DataHub, but the group itself isn't pushed. +To push group information to DataHub, configure the `Push Groups` tab accordingly as shown below: + +

    + +

    + +Refer to the Okta [Group Push](https://help.okta.com/en-us/content/topics/users-groups-profiles/app-assignments-group-push.htm) documentation for more details. \ No newline at end of file diff --git a/docs/what-is-datahub/customer-stories.md b/docs/what-is-datahub/customer-stories.md deleted file mode 100644 index 2745a8aca0d2f2..00000000000000 --- a/docs/what-is-datahub/customer-stories.md +++ /dev/null @@ -1,17 +0,0 @@ -import CustomerCardSection from '@site/src/pages/docs/_components/CustomerCardSection'; - -# Customer Stories - -Meet the DataHub users who have shared their stories with us. - -:::note Share Your DataHub Journey -We're excited to hear about your experience with DataHub. Share your story with us! - -Share Your Story - -::: - - diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/GraphRetriever.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/GraphRetriever.java index 9757a10e3f3c28..cedaac25ffee9d 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/GraphRetriever.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/GraphRetriever.java @@ -20,7 +20,7 @@ public interface GraphRetriever { * @param destinationEntityFilter * @param relationshipTypes * @param relationshipFilter - * @param sortCriterion + * @param sortCriteria * @param scrollId * @param count * @param startTimeMillis @@ -35,7 +35,7 @@ RelatedEntitiesScrollResult scrollRelatedEntities( @Nonnull Filter destinationEntityFilter, @Nonnull List relationshipTypes, @Nonnull RelationshipFilter relationshipFilter, - @Nonnull List sortCriterion, + @Nonnull List sortCriteria, @Nullable String scrollId, int count, @Nullable Long startTimeMillis, diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/models/graph/Edge.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/models/graph/Edge.java index ca5e6d01103848..8777be57e1bd8f 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/models/graph/Edge.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/models/graph/Edge.java @@ -13,6 +13,7 @@ import java.util.Map; import java.util.Optional; import java.util.stream.Collectors; +import javax.annotation.Nonnull; import lombok.AllArgsConstructor; import lombok.Data; import lombok.EqualsAndHashCode; @@ -59,7 +60,7 @@ public Edge( null); } - public String toDocId() { + public String toDocId(@Nonnull String idHashAlgo) { StringBuilder rawDocId = new StringBuilder(); rawDocId .append(getSource().toString()) @@ -73,7 +74,7 @@ public String toDocId() { try { byte[] bytesOfRawDocID = rawDocId.toString().getBytes(StandardCharsets.UTF_8); - MessageDigest md = MessageDigest.getInstance("MD5"); + MessageDigest md = MessageDigest.getInstance(idHashAlgo); byte[] thedigest = md.digest(bytesOfRawDocID); return Base64.getEncoder().encodeToString(thedigest); } catch (NoSuchAlgorithmException e) { diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/ConfigEntityRegistry.java b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/ConfigEntityRegistry.java index 4238c333615ecf..8dd642f63dd975 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/ConfigEntityRegistry.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/ConfigEntityRegistry.java @@ -52,7 +52,7 @@ public class ConfigEntityRegistry implements EntityRegistry { private final DataSchemaFactory dataSchemaFactory; @Getter private final PluginFactory pluginFactory; - @Nullable + @Getter @Nullable private BiFunction, PluginFactory> pluginFactoryProvider; private final Map entityNameToSpec; diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/SnapshotEntityRegistry.java b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/SnapshotEntityRegistry.java index c60f89c510cd7f..16df2d452a619e 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/SnapshotEntityRegistry.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/SnapshotEntityRegistry.java @@ -22,6 +22,8 @@ import com.linkedin.metadata.aspect.patch.template.dataset.UpstreamLineageTemplate; import com.linkedin.metadata.aspect.patch.template.form.FormInfoTemplate; import com.linkedin.metadata.aspect.patch.template.structuredproperty.StructuredPropertyDefinitionTemplate; +import com.linkedin.metadata.aspect.plugins.PluginFactory; +import com.linkedin.metadata.aspect.plugins.config.PluginConfiguration; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.DefaultEntitySpec; import com.linkedin.metadata.models.EntitySpec; @@ -32,8 +34,11 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.function.BiFunction; import java.util.stream.Collectors; import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.Getter; /** * Implementation of {@link EntityRegistry} that builds {@link DefaultEntitySpec} objects from the a @@ -46,6 +51,9 @@ public class SnapshotEntityRegistry implements EntityRegistry { private final AspectTemplateEngine _aspectTemplateEngine; private final Map _aspectNameToSpec; + @Getter @Nullable + private BiFunction, PluginFactory> pluginFactoryProvider; + private static final SnapshotEntityRegistry INSTANCE = new SnapshotEntityRegistry(); public SnapshotEntityRegistry() { @@ -56,6 +64,19 @@ public SnapshotEntityRegistry() { entitySpecs = new ArrayList<>(entityNameToSpec.values()); _aspectNameToSpec = populateAspectMap(entitySpecs); _aspectTemplateEngine = populateTemplateEngine(_aspectNameToSpec); + pluginFactoryProvider = null; + } + + public SnapshotEntityRegistry( + BiFunction, PluginFactory> pluginFactoryProvider) { + entityNameToSpec = + new EntitySpecBuilder() + .buildEntitySpecs(new Snapshot().schema()).stream() + .collect(Collectors.toMap(spec -> spec.getName().toLowerCase(), spec -> spec)); + entitySpecs = new ArrayList<>(entityNameToSpec.values()); + _aspectNameToSpec = populateAspectMap(entitySpecs); + _aspectTemplateEngine = populateTemplateEngine(_aspectNameToSpec); + this.pluginFactoryProvider = pluginFactoryProvider; } public SnapshotEntityRegistry(UnionTemplate snapshot) { diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/PluginsTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/PluginsTest.java index cecf21849f3aaa..b98df05d721ddb 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/PluginsTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/PluginsTest.java @@ -6,6 +6,7 @@ import com.datahub.test.TestEntityProfile; import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.EventSpec; import com.linkedin.metadata.models.registry.ConfigEntityRegistry; @@ -262,23 +263,42 @@ public void testUnloadedMerge() throws EntityRegistryException { mergedEntityRegistry.apply(configEntityRegistry2); assertEquals( - mergedEntityRegistry.getAllAspectPayloadValidators().stream() - .filter(p -> p.getConfig().getSupportedOperations().contains("DELETE")) + mergedEntityRegistry + .getPluginFactory() + .getPluginConfiguration() + .getAspectPayloadValidators() + .stream() + .filter(AspectPluginConfig::isEnabled) + .filter(p -> p.getSupportedOperations().contains("DELETE")) .count(), 1); + assertEquals( - mergedEntityRegistry.getAllMutationHooks().stream() - .filter(p -> p.getConfig().getSupportedOperations().contains("DELETE")) + mergedEntityRegistry.getPluginFactory().getPluginConfiguration().getMutationHooks().stream() + .filter(AspectPluginConfig::isEnabled) + .filter(p -> p.getSupportedOperations().contains("DELETE")) .count(), 1); + assertEquals( - mergedEntityRegistry.getAllMCLSideEffects().stream() - .filter(p -> p.getConfig().getSupportedOperations().contains("DELETE")) + mergedEntityRegistry + .getPluginFactory() + .getPluginConfiguration() + .getMclSideEffects() + .stream() + .filter(AspectPluginConfig::isEnabled) + .filter(p -> p.getSupportedOperations().contains("DELETE")) .count(), 1); + assertEquals( - mergedEntityRegistry.getAllMCPSideEffects().stream() - .filter(p -> p.getConfig().getSupportedOperations().contains("DELETE")) + mergedEntityRegistry + .getPluginFactory() + .getPluginConfiguration() + .getMcpSideEffects() + .stream() + .filter(AspectPluginConfig::isEnabled) + .filter(p -> p.getSupportedOperations().contains("DELETE")) .count(), 1); } diff --git a/li-utils/src/main/java/com/datahub/util/RecordUtils.java b/li-utils/src/main/java/com/datahub/util/RecordUtils.java index 8183ecc21ee27b..2d0881b6984e46 100644 --- a/li-utils/src/main/java/com/datahub/util/RecordUtils.java +++ b/li-utils/src/main/java/com/datahub/util/RecordUtils.java @@ -78,6 +78,14 @@ public static String toJsonString(@Nonnull RecordTemplate recordTemplate) { } } + public static String toJsonString(@Nonnull List recordTemplates) { + StringBuilder json = new StringBuilder(); + for (RecordTemplate recordTemplate : recordTemplates) { + json.append(toJsonString(recordTemplate)); + } + return json.toString(); + } + /** * Creates a {@link RecordTemplate} object from a serialized JSON string. * @@ -99,6 +107,18 @@ public static T toRecordTemplate( return toRecordTemplate(type, dataMap); } + @Nonnull + public static DataMap toDataMap(@Nonnull String jsonString) { + DataMap dataMap; + try { + dataMap = DATA_TEMPLATE_CODEC.stringToMap(jsonString); + } catch (IOException e) { + throw new ModelConversionException("Failed to deserialize DataMap: " + jsonString); + } + + return dataMap; + } + /** * Creates a {@link RecordTemplate} object from a {@link DataMap}. * diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py index c87f7f8fb1a8ee..d3727e41bb378e 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py @@ -143,6 +143,20 @@ def wrapper(*args, **kwargs): return cast(_F, wrapper) +def _render_templates(task_instance: "TaskInstance") -> "TaskInstance": + # Render templates in a copy of the task instance. + # This is necessary to get the correct operator args in the extractors. + try: + task_instance_copy = copy.deepcopy(task_instance) + task_instance_copy.render_templates() + return task_instance_copy + except Exception as e: + logger.info( + f"Error rendering templates in DataHub listener. Jinja-templated variables will not be extracted correctly: {e}" + ) + return task_instance + + class DataHubListener: __name__ = "DataHubListener" @@ -360,15 +374,7 @@ def on_task_instance_running( f"DataHub listener got notification about task instance start for {task_instance.task_id}" ) - # Render templates in a copy of the task instance. - # This is necessary to get the correct operator args in the extractors. - try: - task_instance = copy.deepcopy(task_instance) - task_instance.render_templates() - except Exception as e: - logger.info( - f"Error rendering templates in DataHub listener. Jinja-templated variables will not be extracted correctly: {e}" - ) + task_instance = _render_templates(task_instance) # The type ignore is to placate mypy on Airflow 2.1.x. dagrun: "DagRun" = task_instance.dag_run # type: ignore[attr-defined] @@ -459,8 +465,17 @@ def on_task_instance_finish( self, task_instance: "TaskInstance", status: InstanceRunResult ) -> None: dagrun: "DagRun" = task_instance.dag_run # type: ignore[attr-defined] - task = self._task_holder.get_task(task_instance) or task_instance.task + + task_instance = _render_templates(task_instance) + + # We must prefer the task attribute, in case modifications to the task's inlets/outlets + # were made by the execute() method. + if getattr(task_instance, "task", None): + task = task_instance.task + else: + task = self._task_holder.get_task(task_instance) assert task is not None + dag: "DAG" = task.dag # type: ignore[assignment] datajob = AirflowGenerator.generate_datajob( diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin.py index 437c42713ea015..137cf97f69280a 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin.py @@ -4,6 +4,7 @@ from airflow.plugins_manager import AirflowPlugin +from datahub_airflow_plugin import __package_name__ from datahub_airflow_plugin._airflow_compat import AIRFLOW_PATCHED from datahub_airflow_plugin._airflow_shims import ( HAS_AIRFLOW_LISTENER_API, @@ -23,7 +24,10 @@ from openlineage.airflow.utils import try_import_from_string # noqa: F401 except ImportError: # If v2 plugin dependencies are not installed, we fall back to v1. - logger.debug("Falling back to v1 plugin due to missing dependencies.") + logger.warning( + "Falling back to the v1 DataHub plugin due to missing dependencies. " + f"Please install {__package_name__}[plugin-v2] to fix this." + ) _USE_AIRFLOW_LISTENER_INTERFACE = False diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py index ace7669bfa998e..db47f37bed562e 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py @@ -7,7 +7,6 @@ from airflow.lineage import PIPELINE_OUTLETS from airflow.models.baseoperator import BaseOperator from airflow.utils.module_loading import import_string -from cattr import structure from datahub.api.entities.dataprocess.dataprocess_instance import InstanceRunResult from datahub.telemetry import telemetry @@ -52,6 +51,7 @@ def get_task_inlets_advanced(task: BaseOperator, context: Any) -> Iterable[Any]: ) from airflow.lineage import AUTO + from cattr import structure # pick up unique direct upstream task_ids if AUTO is specified if AUTO.upper() in task_inlets or AUTO.lower() in task_inlets: diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/custom_operator_dag.py b/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/custom_operator_dag.py new file mode 100644 index 00000000000000..b31226b7b4ceeb --- /dev/null +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/custom_operator_dag.py @@ -0,0 +1,74 @@ +import logging +from datetime import datetime, timedelta +from typing import Any, List, Tuple + +from airflow import DAG +from airflow.models.baseoperator import BaseOperator + +from datahub_airflow_plugin.entities import Dataset + +logger = logging.getLogger(__name__) + + +class CustomOperator(BaseOperator): + def __init__(self, name, **kwargs): + super().__init__(**kwargs) + self.name = name + + def execute(self, context): + """ + Other code.... + """ + logger.info("executing other code here") + + input_tables = ["mydb.schema.tableA", "mydb.schema.tableB"] + output_tables = ["mydb.schema.tableD"] + + inlets, outlets = self._get_sf_lineage(input_tables, output_tables) + + context["ti"].task.inlets = inlets + context["ti"].task.outlets = outlets + + @staticmethod + def _get_sf_lineage( + input_tables: List[str], output_tables: List[str] + ) -> Tuple[List[Any], List[Any]]: + """ + Get lineage tables from Snowflake. + """ + inlets: List[Dataset] = [] + outlets: List[Dataset] = [] + + for table in input_tables: + inlets.append(Dataset(platform="snowflake", name=table)) + + for table in output_tables: + outlets.append(Dataset(platform="snowflake", name=table)) + + return inlets, outlets + + +default_args = { + "owner": "airflow", + "depends_on_past": False, + "start_date": datetime(2023, 1, 1), + "email": ["jdoe@example.com"], + "email_on_failure": False, + "execution_timeout": timedelta(minutes=5), +} + + +with DAG( + "custom_operator_dag", + default_args=default_args, + description="An example dag with custom operator", + schedule_interval=None, + tags=["example_tag"], + catchup=False, + default_view="tree", +) as dag: + custom_task = CustomOperator( + task_id="custom_task_id", + name="custom_name", + dag=dag, + ) diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_custom_operator_dag.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_custom_operator_dag.json new file mode 100644 index 00000000000000..b81466930ed41a --- /dev/null +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_custom_operator_dag.json @@ -0,0 +1,365 @@ +[ +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,custom_operator_dag,prod)", + "changeType": "UPSERT", + "aspectName": "dataFlowInfo", + "aspect": { + "json": { + "customProperties": { + "_access_control": "None", + "catchup": "False", + "description": "'An example dag with custom operator'", + "doc_md": "None", + "fileloc": "", + "is_paused_upon_creation": "None", + "start_date": "None", + "tags": "['example_tag']", + "timezone": "Timezone('UTC')" + }, + "externalUrl": "http://airflow.example.com/tree?dag_id=custom_operator_dag", + "name": "custom_operator_dag", + "description": "An example dag with custom operator" + } + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,custom_operator_dag,prod)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:airflow", + "type": "DEVELOPER", + "source": { + "type": "SERVICE" + } + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:airflow" + } + } + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,custom_operator_dag,prod)", + "changeType": "UPSERT", + "aspectName": "globalTags", + "aspect": { + "json": { + "tags": [ + { + "tag": "urn:li:tag:example_tag" + } + ] + } + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,custom_operator_dag,prod)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,custom_operator_dag,prod),custom_task_id)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:example_tag", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,custom_operator_dag,prod)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "custom_operator_dag" + } + ] + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,custom_operator_dag,prod),custom_task_id)", + "changeType": "UPSERT", + "aspectName": "dataJobInfo", + "aspect": { + "json": { + "customProperties": { + "depends_on_past": "False", + "email": "['jdoe@example.com']", + "label": "'custom_task_id'", + "execution_timeout": "datetime.timedelta(seconds=300)", + "sla": "None", + "task_id": "'custom_task_id'", + "trigger_rule": "", + "wait_for_downstream": "False", + "downstream_task_ids": "[]", + "inlets": "[]", + "outlets": "[]", + "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/BaseFacet\", \"unknownItems\": [{\"name\": \"CustomOperator\", \"properties\": {\"depends_on_past\": false, \"downstream_task_ids\": \"[]\", \"execution_timeout\": \"<>\", \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"is_setup\": false, \"is_teardown\": false, \"mapped\": false, \"operator_class\": \"custom_operator.CustomOperator\", \"owner\": \"airflow\", \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_exponential_backoff\": false, \"task_id\": \"custom_task_id\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": \"[]\", \"wait_for_downstream\": false, \"wait_for_past_depends_before_skipping\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}" + }, + "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=custom_operator_dag&_flt_3_task_id=custom_task_id", + "name": "custom_task_id", + "type": { + "string": "COMMAND" + } + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,custom_operator_dag,prod),custom_task_id)", + "changeType": "UPSERT", + "aspectName": "dataJobInputOutput", + "aspect": { + "json": { + "inputDatasets": [], + "outputDatasets": [], + "inputDatajobs": [], + "fineGrainedLineages": [] + } + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:07a4aaeffa3875a24cccd1fec6fc7c8c", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceProperties", + "aspect": { + "json": { + "customProperties": { + "run_id": "manual_run_test", + "duration": "", + "start_date": "", + "end_date": "", + "execution_date": "2023-09-27 21:34:38+00:00", + "try_number": "0", + "max_tries": "0", + "external_executor_id": "None", + "state": "running", + "operator": "CustomOperator", + "priority_weight": "1", + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=custom_task_id&dag_id=custom_operator_dag&map_index=-1", + "orchestrator": "airflow", + "dag_id": "custom_operator_dag", + "task_id": "custom_task_id" + }, + "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=custom_task_id&dag_id=custom_operator_dag&map_index=-1", + "name": "custom_operator_dag_custom_task_id_manual_run_test", + "type": "BATCH_AD_HOC", + "created": { + "time": 1722943444074, + "actor": "urn:li:corpuser:datahub" + } + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,custom_operator_dag,prod),custom_task_id)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:airflow", + "type": "DEVELOPER", + "source": { + "type": "SERVICE" + } + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:airflow" + } + } + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:07a4aaeffa3875a24cccd1fec6fc7c8c", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRelationships", + "aspect": { + "json": { + "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,custom_operator_dag,prod),custom_task_id)", + "upstreamInstances": [] + } + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:07a4aaeffa3875a24cccd1fec6fc7c8c", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRunEvent", + "aspect": { + "json": { + "timestampMillis": 1722943444074, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "status": "STARTED", + "attempt": 1 + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetKey", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:snowflake", + "name": "mydb.schema.tableA", + "origin": "PROD" + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,custom_operator_dag,prod),custom_task_id)", + "changeType": "UPSERT", + "aspectName": "dataJobInfo", + "aspect": { + "json": { + "customProperties": { + "depends_on_past": "False", + "email": "['jdoe@example.com']", + "label": "'custom_task_id'", + "execution_timeout": "datetime.timedelta(seconds=300)", + "sla": "None", + "task_id": "'custom_task_id'", + "trigger_rule": "", + "wait_for_downstream": "False", + "downstream_task_ids": "[]", + "inlets": "[Dataset(platform='snowflake', name='mydb.schema.tableA', env='PROD', platform_instance=None), Dataset(platform='snowflake', name='mydb.schema.tableB', env='PROD', platform_instance=None)]", + "outlets": "[Dataset(platform='snowflake', name='mydb.schema.tableD', env='PROD', platform_instance=None)]", + "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/BaseFacet\", \"unknownItems\": [{\"name\": \"CustomOperator\", \"properties\": {\"depends_on_past\": false, \"downstream_task_ids\": \"[]\", \"execution_timeout\": \"<>\", \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"is_setup\": false, \"is_teardown\": false, \"mapped\": false, \"operator_class\": \"custom_operator.CustomOperator\", \"owner\": \"airflow\", \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_exponential_backoff\": false, \"task_id\": \"custom_task_id\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": \"[]\", \"wait_for_downstream\": false, \"wait_for_past_depends_before_skipping\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}" + }, + "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=custom_operator_dag&_flt_3_task_id=custom_task_id", + "name": "custom_task_id", + "type": { + "string": "COMMAND" + } + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetKey", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:snowflake", + "name": "mydb.schema.tableD", + "origin": "PROD" + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,custom_operator_dag,prod),custom_task_id)", + "changeType": "UPSERT", + "aspectName": "globalTags", + "aspect": { + "json": { + "tags": [ + { + "tag": "urn:li:tag:example_tag" + } + ] + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,custom_operator_dag,prod),custom_task_id)", + "changeType": "UPSERT", + "aspectName": "dataJobInputOutput", + "aspect": { + "json": { + "inputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,PROD)" + ], + "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)" + ], + "inputDatajobs": [], + "fineGrainedLineages": [] + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetKey", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:snowflake", + "name": "mydb.schema.tableB", + "origin": "PROD" + } + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:07a4aaeffa3875a24cccd1fec6fc7c8c", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRunEvent", + "aspect": { + "json": { + "timestampMillis": 1722943444263, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "status": "COMPLETE", + "result": { + "type": "SUCCESS", + "nativeResultType": "airflow" + } + } + } +} +] \ No newline at end of file diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_custom_operator_dag_no_dag_listener.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_custom_operator_dag_no_dag_listener.json new file mode 100644 index 00000000000000..019122600aedbc --- /dev/null +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_custom_operator_dag_no_dag_listener.json @@ -0,0 +1,404 @@ +[ +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,custom_operator_dag,prod)", + "changeType": "UPSERT", + "aspectName": "dataFlowInfo", + "aspect": { + "json": { + "customProperties": { + "_access_control": "None", + "catchup": "False", + "description": "'An example dag with custom operator'", + "doc_md": "None", + "fileloc": "", + "is_paused_upon_creation": "None", + "start_date": "None", + "tags": "['example_tag']", + "timezone": "Timezone('UTC')" + }, + "externalUrl": "http://airflow.example.com/tree?dag_id=custom_operator_dag", + "name": "custom_operator_dag", + "description": "An example dag with custom operator" + } + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,custom_operator_dag,prod)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:airflow", + "type": "DEVELOPER", + "source": { + "type": "SERVICE" + } + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:airflow" + } + } + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,custom_operator_dag,prod)", + "changeType": "UPSERT", + "aspectName": "globalTags", + "aspect": { + "json": { + "tags": [ + { + "tag": "urn:li:tag:example_tag" + } + ] + } + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,custom_operator_dag,prod)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,custom_operator_dag,prod),custom_task_id)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:example_tag", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,custom_operator_dag,prod)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "custom_operator_dag" + } + ] + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,custom_operator_dag,prod),custom_task_id)", + "changeType": "UPSERT", + "aspectName": "dataJobInfo", + "aspect": { + "json": { + "customProperties": { + "depends_on_past": "False", + "email": "['jdoe@example.com']", + "label": "'custom_task_id'", + "execution_timeout": "datetime.timedelta(seconds=300)", + "sla": "None", + "task_id": "'custom_task_id'", + "trigger_rule": "", + "wait_for_downstream": "False", + "downstream_task_ids": "[]", + "inlets": "[]", + "outlets": "[]", + "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/BaseFacet\", \"unknownItems\": [{\"name\": \"CustomOperator\", \"properties\": {\"depends_on_past\": false, \"downstream_task_ids\": \"[]\", \"execution_timeout\": \"<>\", \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"mapped\": false, \"operator_class\": \"custom_operator.CustomOperator\", \"owner\": \"airflow\", \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_exponential_backoff\": false, \"task_id\": \"custom_task_id\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": \"[]\", \"wait_for_downstream\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}" + }, + "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=custom_operator_dag&_flt_3_task_id=custom_task_id", + "name": "custom_task_id", + "type": { + "string": "COMMAND" + } + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,custom_operator_dag,prod),custom_task_id)", + "changeType": "UPSERT", + "aspectName": "dataJobInputOutput", + "aspect": { + "json": { + "inputDatasets": [], + "outputDatasets": [], + "inputDatajobs": [], + "fineGrainedLineages": [] + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,custom_operator_dag,prod),custom_task_id)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:airflow", + "type": "DEVELOPER", + "source": { + "type": "SERVICE" + } + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:airflow" + } + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,custom_operator_dag,prod),custom_task_id)", + "changeType": "UPSERT", + "aspectName": "globalTags", + "aspect": { + "json": { + "tags": [ + { + "tag": "urn:li:tag:example_tag" + } + ] + } + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:07a4aaeffa3875a24cccd1fec6fc7c8c", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceProperties", + "aspect": { + "json": { + "customProperties": { + "run_id": "manual_run_test", + "duration": "", + "start_date": "", + "end_date": "", + "execution_date": "2023-09-27 21:34:38+00:00", + "try_number": "0", + "max_tries": "0", + "external_executor_id": "None", + "state": "running", + "operator": "CustomOperator", + "priority_weight": "1", + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=custom_task_id&dag_id=custom_operator_dag&map_index=-1", + "orchestrator": "airflow", + "dag_id": "custom_operator_dag", + "task_id": "custom_task_id" + }, + "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=custom_task_id&dag_id=custom_operator_dag&map_index=-1", + "name": "custom_operator_dag_custom_task_id_manual_run_test", + "type": "BATCH_AD_HOC", + "created": { + "time": 1723716446564, + "actor": "urn:li:corpuser:datahub" + } + } + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:07a4aaeffa3875a24cccd1fec6fc7c8c", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRelationships", + "aspect": { + "json": { + "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,custom_operator_dag,prod),custom_task_id)", + "upstreamInstances": [] + } + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:07a4aaeffa3875a24cccd1fec6fc7c8c", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRunEvent", + "aspect": { + "json": { + "timestampMillis": 1723716446564, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "status": "STARTED", + "attempt": 1 + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,custom_operator_dag,prod),custom_task_id)", + "changeType": "UPSERT", + "aspectName": "dataJobInfo", + "aspect": { + "json": { + "customProperties": { + "depends_on_past": "False", + "email": "['jdoe@example.com']", + "label": "'custom_task_id'", + "execution_timeout": "datetime.timedelta(seconds=300)", + "sla": "None", + "task_id": "'custom_task_id'", + "trigger_rule": "", + "wait_for_downstream": "False", + "downstream_task_ids": "[]", + "inlets": "[Dataset(platform='snowflake', name='mydb.schema.tableA', env='PROD', platform_instance=None), Dataset(platform='snowflake', name='mydb.schema.tableB', env='PROD', platform_instance=None)]", + "outlets": "[Dataset(platform='snowflake', name='mydb.schema.tableD', env='PROD', platform_instance=None)]", + "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/BaseFacet\", \"unknownItems\": [{\"name\": \"CustomOperator\", \"properties\": {\"depends_on_past\": false, \"downstream_task_ids\": \"[]\", \"execution_timeout\": \"<>\", \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"mapped\": false, \"operator_class\": \"custom_operator.CustomOperator\", \"owner\": \"airflow\", \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_exponential_backoff\": false, \"task_id\": \"custom_task_id\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": \"[]\", \"wait_for_downstream\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}" + }, + "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=custom_operator_dag&_flt_3_task_id=custom_task_id", + "name": "custom_task_id", + "type": { + "string": "COMMAND" + } + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,custom_operator_dag,prod),custom_task_id)", + "changeType": "UPSERT", + "aspectName": "dataJobInputOutput", + "aspect": { + "json": { + "inputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,PROD)" + ], + "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)" + ], + "inputDatajobs": [], + "fineGrainedLineages": [] + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetKey", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:snowflake", + "name": "mydb.schema.tableA", + "origin": "PROD" + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetKey", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:snowflake", + "name": "mydb.schema.tableB", + "origin": "PROD" + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetKey", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:snowflake", + "name": "mydb.schema.tableD", + "origin": "PROD" + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,custom_operator_dag,prod),custom_task_id)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:airflow", + "type": "DEVELOPER", + "source": { + "type": "SERVICE" + } + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:airflow" + } + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,custom_operator_dag,prod),custom_task_id)", + "changeType": "UPSERT", + "aspectName": "globalTags", + "aspect": { + "json": { + "tags": [ + { + "tag": "urn:li:tag:example_tag" + } + ] + } + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:07a4aaeffa3875a24cccd1fec6fc7c8c", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRunEvent", + "aspect": { + "json": { + "timestampMillis": 1723716446701, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "status": "COMPLETE", + "result": { + "type": "SUCCESS", + "nativeResultType": "airflow" + } + } + } +} +] \ No newline at end of file diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json index e7902d165051b1..4bc34b7b0d3ce5 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json @@ -350,8 +350,8 @@ "json": { "timestampMillis": 1717179743558, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "STARTED", "attempt": 1 @@ -367,8 +367,8 @@ "json": { "timestampMillis": 1717179743932, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "actor": "urn:li:corpuser:airflow", "operationType": "CREATE", @@ -552,8 +552,8 @@ "json": { "timestampMillis": 1717179743960, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "COMPLETE", "result": { @@ -742,8 +742,8 @@ "json": { "timestampMillis": 1717179748679, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "STARTED", "attempt": 1 @@ -759,8 +759,8 @@ "json": { "timestampMillis": 1717179749258, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "actor": "urn:li:corpuser:airflow", "operationType": "CREATE", @@ -875,8 +875,8 @@ "json": { "timestampMillis": 1717179749324, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "COMPLETE", "result": { @@ -1161,8 +1161,8 @@ "json": { "timestampMillis": 1717179757397, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "STARTED", "attempt": 1 @@ -1178,8 +1178,8 @@ "json": { "timestampMillis": 1717179758424, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "actor": "urn:li:corpuser:airflow", "operationType": "CREATE", @@ -1420,8 +1420,8 @@ "json": { "timestampMillis": 1717179758496, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "COMPLETE", "result": { @@ -1483,10 +1483,10 @@ "aspectName": "dataJobInputOutput", "aspect": { "json": { - "inputDatasets": [ + "inputDatasets": [], + "outputDatasets": [ "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)" ], - "outputDatasets": [], "inputDatajobs": [ "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)" ], @@ -1555,6 +1555,19 @@ } } }, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:07285de22276959612189d51336cc21a", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceOutput", + "aspect": { + "json": { + "outputs": [ + "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)" + ] + } + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)", @@ -1640,19 +1653,6 @@ } } }, -{ - "entityType": "dataProcessInstance", - "entityUrn": "urn:li:dataProcessInstance:07285de22276959612189d51336cc21a", - "changeType": "UPSERT", - "aspectName": "dataProcessInstanceInput", - "aspect": { - "json": { - "inputs": [ - "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)" - ] - } - } -}, { "entityType": "dataProcessInstance", "entityUrn": "urn:li:dataProcessInstance:07285de22276959612189d51336cc21a", @@ -1662,8 +1662,8 @@ "json": { "timestampMillis": 1718733767964, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "STARTED", "attempt": 1 @@ -1679,8 +1679,8 @@ "json": { "timestampMillis": 1718733768638, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "COMPLETE", "result": { @@ -1697,10 +1697,10 @@ "aspectName": "dataJobInputOutput", "aspect": { "json": { - "inputDatasets": [ + "inputDatasets": [], + "outputDatasets": [ "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)" ], - "outputDatasets": [], "inputDatajobs": [ "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)" ], @@ -1809,19 +1809,6 @@ } } }, -{ - "entityType": "dataProcessInstance", - "entityUrn": "urn:li:dataProcessInstance:bab908abccf3cd6607b50fdaf3003372", - "changeType": "UPSERT", - "aspectName": "dataProcessInstanceInput", - "aspect": { - "json": { - "inputs": [ - "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)" - ] - } - } -}, { "entityType": "dataProcessInstance", "entityUrn": "urn:li:dataProcessInstance:bab908abccf3cd6607b50fdaf3003372", @@ -1843,8 +1830,8 @@ "json": { "timestampMillis": 1718733773354, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "STARTED", "attempt": 1 @@ -1860,8 +1847,8 @@ "json": { "timestampMillis": 1718733774147, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "COMPLETE", "result": { @@ -1870,5 +1857,18 @@ } } } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:bab908abccf3cd6607b50fdaf3003372", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceOutput", + "aspect": { + "json": { + "outputs": [ + "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)" + ] + } + } } ] \ No newline at end of file diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json index a9af068e2e4e93..99bda0e0f2569a 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json @@ -336,8 +336,8 @@ "json": { "timestampMillis": 1717180072004, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "STARTED", "attempt": 1 @@ -382,8 +382,8 @@ "json": { "timestampMillis": 1719864194882, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "actor": "urn:li:corpuser:airflow", "operationType": "CREATE", @@ -435,8 +435,8 @@ "json": { "timestampMillis": 1717180072275, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "COMPLETE", "result": { @@ -641,8 +641,8 @@ "json": { "timestampMillis": 1717180078196, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "STARTED", "attempt": 1 @@ -722,8 +722,8 @@ "json": { "timestampMillis": 1717180078619, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "COMPLETE", "result": { @@ -1000,8 +1000,8 @@ "json": { "timestampMillis": 1717180084642, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "STARTED", "attempt": 1 @@ -1081,8 +1081,8 @@ "json": { "timestampMillis": 1717180085266, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "COMPLETE", "result": { @@ -1186,10 +1186,10 @@ "aspectName": "dataJobInputOutput", "aspect": { "json": { - "inputDatasets": [ + "inputDatasets": [], + "outputDatasets": [ "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)" ], - "outputDatasets": [], "inputDatajobs": [ "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)" ], @@ -1287,8 +1287,8 @@ "json": { "timestampMillis": 1717180091148, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "STARTED", "attempt": 1 @@ -1368,8 +1368,8 @@ "json": { "timestampMillis": 1717180091923, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "COMPLETE", "result": { @@ -1499,10 +1499,10 @@ "aspectName": "dataJobInputOutput", "aspect": { "json": { - "inputDatasets": [ + "inputDatasets": [], + "outputDatasets": [ "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)" ], - "outputDatasets": [], "inputDatajobs": [ "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)" ], @@ -1613,8 +1613,8 @@ "json": { "timestampMillis": 1717180096108, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "STARTED", "attempt": 1 @@ -1630,8 +1630,8 @@ "json": { "timestampMillis": 1719864203487, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "actor": "urn:li:corpuser:airflow", "operationType": "CREATE", @@ -1712,8 +1712,8 @@ "json": { "timestampMillis": 1717180096993, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "COMPLETE", "result": { @@ -1727,10 +1727,10 @@ "entityType": "dataProcessInstance", "entityUrn": "urn:li:dataProcessInstance:bab908abccf3cd6607b50fdaf3003372", "changeType": "UPSERT", - "aspectName": "dataProcessInstanceInput", + "aspectName": "dataProcessInstanceOutput", "aspect": { "json": { - "inputs": [ + "outputs": [ "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)" ] } @@ -1740,10 +1740,10 @@ "entityType": "dataProcessInstance", "entityUrn": "urn:li:dataProcessInstance:07285de22276959612189d51336cc21a", "changeType": "UPSERT", - "aspectName": "dataProcessInstanceInput", + "aspectName": "dataProcessInstanceOutput", "aspect": { "json": { - "inputs": [ + "outputs": [ "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)" ] } diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py b/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py index 9ea822edeef81f..2b8d4c47f62246 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py @@ -110,7 +110,9 @@ def _wait_for_dag_finish( @contextlib.contextmanager def _run_airflow( - tmp_path: pathlib.Path, dags_folder: pathlib.Path, is_v1: bool + tmp_path: pathlib.Path, + dags_folder: pathlib.Path, + is_v1: bool, ) -> Iterator[AirflowInstance]: airflow_home = tmp_path / "airflow_home" print(f"Using airflow home: {airflow_home}") @@ -272,6 +274,7 @@ class DagTestCase: DagTestCase("basic_iolets"), DagTestCase("snowflake_operator", success=False, v2_only=True), DagTestCase("sqlite_operator", v2_only=True), + DagTestCase("custom_operator_dag", v2_only=True), ] diff --git a/metadata-ingestion-modules/airflow-plugin/tox.ini b/metadata-ingestion-modules/airflow-plugin/tox.ini index 4d66dbc860aa9f..9e0a30df6fcbde 100644 --- a/metadata-ingestion-modules/airflow-plugin/tox.ini +++ b/metadata-ingestion-modules/airflow-plugin/tox.ini @@ -23,7 +23,9 @@ deps = # Respect the Airflow constraints files. # We can't make ourselves work with the constraints of Airflow < 2.3. - py310-airflow24: -c https://raw.githubusercontent.com/apache/airflow/constraints-2.4.3/constraints-3.10.txt + # The Airflow 2.4 constraints file requires a version of the sqlite provider whose + # hook type is missing the `conn_name_attr` property. + ; py310-airflow24: -c https://raw.githubusercontent.com/apache/airflow/constraints-2.4.3/constraints-3.10.txt py310-airflow26: -c https://raw.githubusercontent.com/apache/airflow/constraints-2.6.3/constraints-3.10.txt py310-airflow27: -c https://raw.githubusercontent.com/apache/airflow/constraints-2.7.3/constraints-3.10.txt py310-airflow28: -c https://raw.githubusercontent.com/apache/airflow/constraints-2.8.1/constraints-3.10.txt diff --git a/metadata-ingestion/docs/transformer/dataset_transformer.md b/metadata-ingestion/docs/transformer/dataset_transformer.md index ac6fefc3095741..03a224bcf7da47 100644 --- a/metadata-ingestion/docs/transformer/dataset_transformer.md +++ b/metadata-ingestion/docs/transformer/dataset_transformer.md @@ -1207,20 +1207,51 @@ The config, which we’d append to our ingestion recipe YAML, would look like th | Field | Required | Type | Default | Description | |---------------------------------------|----------|----------------------|-------------|---------------------------------------------------------------------------------------------| | `dataset_to_data_product_urns_pattern`| ✅ | map[regx, urn] | | Dataset Entity urn with regular expression and dataproduct urn apply to matching entity urn.| +| `is_container` | | bool | `false` | Whether to also consider a container or not. If true, the data product will be attached to both the dataset and its container. | -Let’s suppose we’d like to append a series of dataproducts with specific datasets as its assets. To do so, we can use the `pattern_add_dataset_dataproduct` module that’s included in the ingestion framework. This will match the regex pattern to `urn` of the dataset and create the data product entity with given urn and matched datasets as its assets. + +Let’s suppose we’d like to append a series of data products with specific datasets or their containers as assets. To do so, we can use the pattern_add_dataset_dataproduct module that’s included in the ingestion framework. This module matches a regex pattern to the urn of the dataset and creates a data product entity with the given urn, associating the matched datasets as its assets. + +If the is_container field is set to true, the module will not only attach the data product to the matching datasets but will also find and attach the containers associated with those datasets. This means that both the datasets and their containers will be associated with the specified data product. The config, which we’d append to our ingestion recipe YAML, would look like this: +- Add Product to dataset + ```yaml + transformers: + - type: "pattern_add_dataset_dataproduct" + config: + dataset_to_data_product_urns_pattern: + rules: + ".*example1.*": "urn:li:dataProduct:first" + ".*example2.*": "urn:li:dataProduct:second" + ``` +- Add Product to dataset container ```yaml transformers: - type: "pattern_add_dataset_dataproduct" config: + is_container: true dataset_to_data_product_urns_pattern: rules: ".*example1.*": "urn:li:dataProduct:first" ".*example2.*": "urn:li:dataProduct:second" ``` +⚠️ Warning: +When working with two datasets in the same container but with different data products, only one data product can be attached to the container. + +For example: +```yaml +transformers: + - type: "pattern_add_dataset_dataproduct" + config: + is_container: true + dataset_to_data_product_urns_pattern: + rules: + ".*example1.*": "urn:li:dataProduct:first" + ".*example2.*": "urn:li:dataProduct:second" +``` +If example1 and example2 are in the same container, only urn:li:dataProduct:first will be added. However, if they are in separate containers, the system works as expected and assigns the correct data product URNs. ## Add Dataset dataProduct ### Config Details diff --git a/metadata-ingestion/examples/mce_files/bootstrap_mce.json b/metadata-ingestion/examples/mce_files/bootstrap_mce.json index fbe6b9953cb4fa..bc218e5e8c2d53 100644 --- a/metadata-ingestion/examples/mce_files/bootstrap_mce.json +++ b/metadata-ingestion/examples/mce_files/bootstrap_mce.json @@ -3394,7 +3394,7 @@ "changeType":"UPSERT", "aspectName":"datasetProfile", "aspect":{ - "value":"{\"timestampMillis\": 1679515693000, \"rowCount\": 4500, \"columnCount\": 2, \"sizeInBytes\": 842000200000, \"fieldProfiles\": [{\"fieldPath\": \"field_foo\", \"uniqueCount\": 2, \"uniqueProportion\": 0.00044, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"true\", \"false\"]}, {\"fieldPath\": \"field_bar\", \"uniqueCount\": 2, \"uniqueProportion\": 0.00044, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"false\"]}]}", + "value":"{\"timestampMillis\": 1723488954865, \"rowCount\": 4500, \"columnCount\": 2, \"sizeInBytes\": 842000200000, \"fieldProfiles\": [{\"fieldPath\": \"field_foo\", \"uniqueCount\": 2, \"uniqueProportion\": 0.00044, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"true\", \"false\"]}, {\"fieldPath\": \"field_bar\", \"uniqueCount\": 2, \"uniqueProportion\": 0.00044, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"false\"]}]}", "contentType":"application/json" }, "systemMetadata":null @@ -3406,7 +3406,7 @@ "changeType":"UPSERT", "aspectName":"datasetProfile", "aspect":{ - "value":"{\"timestampMillis\": 1684786093000, \"rowCount\": 3500, \"columnCount\": 2, \"fieldProfiles\": [{\"fieldPath\": \"field_foo\", \"uniqueCount\": 2, \"uniqueProportion\": 0.00057, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"true\", \"false\"]}, {\"fieldPath\": \"field_bar\", \"uniqueCount\": 2, \"uniqueProportion\": 0.00057, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"true\"]}]}", + "value":"{\"timestampMillis\": 1723488954865, \"rowCount\": 3500, \"columnCount\": 2, \"fieldProfiles\": [{\"fieldPath\": \"field_foo\", \"uniqueCount\": 2, \"uniqueProportion\": 0.00057, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"true\", \"false\"]}, {\"fieldPath\": \"field_bar\", \"uniqueCount\": 2, \"uniqueProportion\": 0.00057, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"true\"]}]}", "contentType":"application/json" }, "systemMetadata":null diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 445600b8abd48b..7fb83fb6a83253 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -99,11 +99,13 @@ sqlglot_lib = { # Using an Acryl fork of sqlglot. # https://github.com/tobymao/sqlglot/compare/main...hsheth2:sqlglot:main?expand=1 - "acryl-sqlglot[rs]==25.3.1.dev3", + "acryl-sqlglot[rs]==25.8.2.dev9", } classification_lib = { "acryl-datahub-classify==0.0.11", + # schwifty is needed for the classify plugin but in 2024.08.0 they broke the python 3.8 compatibility + "schwifty<2024.08.0", # This is a bit of a hack. Because we download the SpaCy model at runtime in the classify plugin, # we need pip to be available. "pip", @@ -171,6 +173,7 @@ *sqlglot_lib, "GitPython>2", "python-liquid", + "deepmerge>=1.1.1" } bigquery_common = { @@ -295,7 +298,7 @@ databricks = { # 0.1.11 appears to have authentication issues with azure databricks - "databricks-sdk>=0.9.0", + "databricks-sdk>=0.30.0", "pyspark~=3.3.0", "requests", # Version 2.4.0 includes sqlalchemy dialect, 2.8.0 includes some bug fixes diff --git a/metadata-ingestion/src/datahub/ingestion/api/source.py b/metadata-ingestion/src/datahub/ingestion/api/source.py index a4de8b382430c7..3dea3d36f41f17 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/source.py +++ b/metadata-ingestion/src/datahub/ingestion/api/source.py @@ -47,7 +47,7 @@ logger = logging.getLogger(__name__) -_MAX_CONTEXT_STRING_LENGTH = 300 +_MAX_CONTEXT_STRING_LENGTH = 1000 class SourceCapability(Enum): diff --git a/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py b/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py index a9f788acf66d38..6d7105bd264416 100644 --- a/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py +++ b/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py @@ -202,6 +202,8 @@ def _write_done_callback( record_urn = _get_urn(record_envelope) if record_urn: e.info["urn"] = record_urn + if workunit_id := record_envelope.metadata.get("workunit_id"): + e.info["workunit_id"] = workunit_id if not self.treat_errors_as_warnings: self.report.report_failure({"error": e.message, "info": e.info}) diff --git a/metadata-ingestion/src/datahub/ingestion/source/abs/source.py b/metadata-ingestion/src/datahub/ingestion/source/abs/source.py index 39ebd79c2e2269..66f268799b2f1f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/abs/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/abs/source.py @@ -8,29 +8,10 @@ from collections import OrderedDict from datetime import datetime from pathlib import PurePath -from typing import Any, Dict, Iterable, List, Optional, Tuple +from typing import Dict, Iterable, List, Optional, Tuple import smart_open.compression as so_compression from more_itertools import peekable -from pyspark.sql.types import ( - ArrayType, - BinaryType, - BooleanType, - ByteType, - DateType, - DecimalType, - DoubleType, - FloatType, - IntegerType, - LongType, - MapType, - NullType, - ShortType, - StringType, - StructField, - StructType, - TimestampType, -) from smart_open import open as smart_open from datahub.emitter.mce_builder import ( @@ -48,7 +29,7 @@ platform_name, support_status, ) -from datahub.ingestion.api.source import MetadataWorkUnitProcessor, SourceReport +from datahub.ingestion.api.source import MetadataWorkUnitProcessor from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source.abs.config import DataLakeSourceConfig, PathSpec from datahub.ingestion.source.abs.report import DataLakeSourceReport @@ -72,22 +53,14 @@ StatefulIngestionSourceBase, ) from datahub.metadata.com.linkedin.pegasus2avro.schema import ( - BooleanTypeClass, - BytesTypeClass, - DateTypeClass, - NullTypeClass, - NumberTypeClass, - RecordTypeClass, SchemaField, SchemaFieldDataType, SchemaMetadata, StringTypeClass, - TimeTypeClass, ) from datahub.metadata.schema_classes import ( DataPlatformInstanceClass, DatasetPropertiesClass, - MapTypeClass, OperationClass, OperationTypeClass, OtherSchemaClass, @@ -100,55 +73,12 @@ logging.getLogger("py4j").setLevel(logging.ERROR) logger: logging.Logger = logging.getLogger(__name__) -# for a list of all types, see https://spark.apache.org/docs/3.0.3/api/python/_modules/pyspark/sql/types.html -_field_type_mapping = { - NullType: NullTypeClass, - StringType: StringTypeClass, - BinaryType: BytesTypeClass, - BooleanType: BooleanTypeClass, - DateType: DateTypeClass, - TimestampType: TimeTypeClass, - DecimalType: NumberTypeClass, - DoubleType: NumberTypeClass, - FloatType: NumberTypeClass, - ByteType: BytesTypeClass, - IntegerType: NumberTypeClass, - LongType: NumberTypeClass, - ShortType: NumberTypeClass, - ArrayType: NullTypeClass, - MapType: MapTypeClass, - StructField: RecordTypeClass, - StructType: RecordTypeClass, -} PAGE_SIZE = 1000 # Hack to support the .gzip extension with smart_open. so_compression.register_compressor(".gzip", so_compression._COMPRESSOR_REGISTRY[".gz"]) -def get_column_type( - report: SourceReport, dataset_name: str, column_type: str -) -> SchemaFieldDataType: - """ - Maps known Spark types to datahub types - """ - TypeClass: Any = None - - for field_type, type_class in _field_type_mapping.items(): - if isinstance(column_type, field_type): - TypeClass = type_class - break - - # if still not found, report the warning - if TypeClass is None: - report.report_warning( - dataset_name, f"unable to map type {column_type} to metadata schema" - ) - TypeClass = NullTypeClass - - return SchemaFieldDataType(type=TypeClass()) - - # config flags to emit telemetry for config_options_to_report = [ "platform", diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py index 46ec75edb9734b..c6a50a1c977f4e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py @@ -985,7 +985,7 @@ def get_tables_for_dataset( # https://cloud.google.com/bigquery/docs/information-schema-partitions max_batch_size: int = ( self.config.number_of_datasets_process_in_batch - if not self.config.is_profiling_enabled() + if not self.config.have_table_data_read_permission else self.config.number_of_datasets_process_in_batch_if_profiling_enabled ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index ead86acc299ca0..8d67551b9e1f2f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -276,6 +276,12 @@ class DBTCommonConfig( DBTEntitiesEnabled(), description="Controls for enabling / disabling metadata emission for different dbt entities (models, test definitions, test results, etc.)", ) + prefer_sql_parser_lineage: bool = Field( + default=False, + description="Normally we use dbt's metadata to generate table lineage. When enabled, we prefer results from the SQL parser when generating lineage instead. " + "This can be useful when dbt models reference tables directly, instead of using the ref() macro. " + "This requires that `skip_sources_in_lineage` is enabled.", + ) skip_sources_in_lineage: bool = Field( default=False, description="[Experimental] When enabled, dbt sources will not be included in the lineage graph. " @@ -366,13 +372,6 @@ class DBTCommonConfig( description="When enabled, includes the compiled code in the emitted metadata.", ) - prefer_sql_parser_lineage: bool = Field( - default=False, - description="Normally we use dbt's metadata to generate table lineage. When enabled, we prefer results from the SQL parser when generating lineage instead. " - "This can be useful when dbt models reference tables directly, instead of using the ref() macro. " - "This requires that `skip_sources_in_lineage` is enabled.", - ) - @validator("target_platform") def validate_target_platform_value(cls, target_platform: str) -> str: if target_platform.lower() == DBT_PLATFORM: @@ -438,15 +437,27 @@ def validate_include_column_lineage( return include_column_lineage - @validator("skip_sources_in_lineage") + @validator("skip_sources_in_lineage", always=True) def validate_skip_sources_in_lineage( cls, skip_sources_in_lineage: bool, values: Dict ) -> bool: - entites_enabled: Optional[DBTEntitiesEnabled] = values.get("entities_enabled") + entities_enabled: Optional[DBTEntitiesEnabled] = values.get("entities_enabled") + prefer_sql_parser_lineage: Optional[bool] = values.get( + "prefer_sql_parser_lineage" + ) + + if prefer_sql_parser_lineage and not skip_sources_in_lineage: + raise ValueError( + "`prefer_sql_parser_lineage` requires that `skip_sources_in_lineage` is enabled." + ) + if ( skip_sources_in_lineage - and entites_enabled - and entites_enabled.sources == EmitDirective.YES + and entities_enabled + and entities_enabled.sources == EmitDirective.YES + # When `prefer_sql_parser_lineage` is enabled, it's ok to have `skip_sources_in_lineage` enabled + # without also disabling sources. + and not prefer_sql_parser_lineage ): raise ValueError( "When `skip_sources_in_lineage` is enabled, `entities_enabled.sources` must be set to NO." @@ -454,16 +465,6 @@ def validate_skip_sources_in_lineage( return skip_sources_in_lineage - @validator("prefer_sql_parser_lineage") - def validate_prefer_sql_parser_lineage( - cls, prefer_sql_parser_lineage: bool, values: Dict - ) -> bool: - if prefer_sql_parser_lineage and not values.get("skip_sources_in_lineage"): - raise ValueError( - "`prefer_sql_parser_lineage` requires that `skip_sources_in_lineage` is enabled." - ) - return prefer_sql_parser_lineage - @dataclass class DBTColumn: @@ -769,23 +770,30 @@ def make_mapping_upstream_lineage( downstream_urn: str, node: DBTNode, convert_column_urns_to_lowercase: bool, + skip_sources_in_lineage: bool, ) -> UpstreamLineageClass: cll = [] - for column in node.columns or []: - field_name = column.name - if convert_column_urns_to_lowercase: - field_name = field_name.lower() - - cll.append( - FineGrainedLineage( - upstreamType=FineGrainedLineageUpstreamType.FIELD_SET, - upstreams=[mce_builder.make_schema_field_urn(upstream_urn, field_name)], - downstreamType=FineGrainedLineageDownstreamType.FIELD, - downstreams=[ - mce_builder.make_schema_field_urn(downstream_urn, field_name) - ], + if not (node.node_type == "source" and skip_sources_in_lineage): + # If `skip_sources_in_lineage` is enabled, we want to generate table lineage (for siblings) + # but not CLL. That's because CLL will make it look like the warehouse node has downstream + # column lineage, but it's really just empty. + for column in node.columns or []: + field_name = column.name + if convert_column_urns_to_lowercase: + field_name = field_name.lower() + + cll.append( + FineGrainedLineage( + upstreamType=FineGrainedLineageUpstreamType.FIELD_SET, + upstreams=[ + mce_builder.make_schema_field_urn(upstream_urn, field_name) + ], + downstreamType=FineGrainedLineageDownstreamType.FIELD, + downstreams=[ + mce_builder.make_schema_field_urn(downstream_urn, field_name) + ], + ) ) - ) return UpstreamLineageClass( upstreams=[ @@ -849,8 +857,11 @@ def get_column_type( # if still not found, report the warning if TypeClass is None: if column_type: - report.report_warning( - dataset_name, f"unable to map type {column_type} to metadata schema" + report.info( + title="Unable to map column types to DataHub types", + message="Got an unexpected column type. The column's parsed field type will not be populated.", + context=f"{dataset_name} - {column_type}", + log=False, ) TypeClass = NullTypeClass @@ -1473,6 +1484,7 @@ def create_target_platform_mces( downstream_urn=node_datahub_urn, node=node, convert_column_urns_to_lowercase=self.config.convert_column_urns_to_lowercase, + skip_sources_in_lineage=self.config.skip_sources_in_lineage, ) if self.config.incremental_lineage: # We only generate incremental lineage for non-dbt nodes. @@ -1818,6 +1830,7 @@ def _create_lineage_aspect_for_dbt_node( downstream_urn=node_urn, node=node, convert_column_urns_to_lowercase=self.config.convert_column_urns_to_lowercase, + skip_sources_in_lineage=self.config.skip_sources_in_lineage, ) else: upstream_urns = get_upstreams( diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_constant.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_constant.py index 21160cc97d4a62..5f47d361abb37c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_constant.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_constant.py @@ -1 +1,10 @@ IMPORTED_PROJECTS = "imported_projects" +SQL_TABLE_NAME = "sql_table_name" +DATAHUB_TRANSFORMED_SQL_TABLE_NAME = "datahub_transformed_sql_table_name" +DERIVED_TABLE = "derived_table" +SQL = "sql" +DATAHUB_TRANSFORMED_SQL = "datahub_transformed_sql" +prod = "prod" +dev = "dev" +NAME = "name" +DERIVED_DOT_SQL = "derived.sql" diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_file_loader.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_file_loader.py index fd670c23ad9cb0..52ebcdde06a279 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_file_loader.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_file_loader.py @@ -1,17 +1,18 @@ import logging import pathlib from dataclasses import replace -from typing import Any, Dict, Optional +from typing import Dict, Optional from datahub.ingestion.source.looker.lkml_patched import load_lkml from datahub.ingestion.source.looker.looker_config import LookerConnectionDefinition from datahub.ingestion.source.looker.looker_dataclasses import LookerViewFile from datahub.ingestion.source.looker.looker_template_language import ( - resolve_liquid_variable_in_view_dict, + process_lookml_template_language, ) from datahub.ingestion.source.looker.lookml_config import ( _EXPLORE_FILE_EXTENSION, _VIEW_FILE_EXTENSION, + LookMLSourceConfig, LookMLSourceReport, ) @@ -29,13 +30,13 @@ def __init__( root_project_name: Optional[str], base_projects_folder: Dict[str, pathlib.Path], reporter: LookMLSourceReport, - liquid_variable: Dict[Any, Any], + source_config: LookMLSourceConfig, ) -> None: self.viewfile_cache: Dict[str, Optional[LookerViewFile]] = {} self._root_project_name = root_project_name self._base_projects_folder = base_projects_folder self.reporter = reporter - self.liquid_variable = liquid_variable + self.source_config = source_config def _load_viewfile( self, project_name: str, path: str, reporter: LookMLSourceReport @@ -73,9 +74,9 @@ def _load_viewfile( parsed = load_lkml(path) - resolve_liquid_variable_in_view_dict( - raw_view=parsed, - liquid_variable=self.liquid_variable, + process_lookml_template_language( + view_lkml_file_dict=parsed, + source_config=self.source_config, ) looker_viewfile = LookerViewFile.from_looker_dict( diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py index d61458d8e924aa..ef329da930dda4 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py @@ -96,13 +96,11 @@ ChartTypeClass, ContainerClass, DashboardInfoClass, - DataPlatformInfoClass, InputFieldClass, InputFieldsClass, OwnerClass, OwnershipClass, OwnershipTypeClass, - PlatformTypeClass, SubTypesClass, ) from datahub.utilities.backpressure_aware_executor import BackpressureAwareExecutor @@ -1573,25 +1571,6 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: looker_dashboards_for_usage: List[looker_usage.LookerDashboardForUsage] = [] - # Emit platform instance entity - if self.source_config.platform_instance: - platform_instance_urn = builder.make_dataplatform_instance_urn( - platform=self.source_config.platform_name, - instance=self.source_config.platform_instance, - ) - - yield MetadataWorkUnit( - id=f"{platform_instance_urn}-aspect-dataplatformInfo", - mcp=MetadataChangeProposalWrapper( - entityUrn=platform_instance_urn, - aspect=DataPlatformInfoClass( - name=self.source_config.platform_instance, - type=PlatformTypeClass.OTHERS, - datasetNameDelimiter=".", - ), - ), - ) - with self.reporter.report_stage("dashboard_chart_metadata"): for job in BackpressureAwareExecutor.map( self.process_dashboard, diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py index 2c523fcd98d08c..04f9ec081ee680 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py @@ -1,17 +1,30 @@ import logging import re -from typing import Any, ClassVar, Dict, Set +from abc import ABC, abstractmethod +from typing import Any, ClassVar, Dict, List, Optional, Set +from deepmerge import always_merger from liquid import Undefined from liquid.exceptions import LiquidSyntaxError +from datahub.ingestion.source.looker.looker_constant import ( + DATAHUB_TRANSFORMED_SQL, + DATAHUB_TRANSFORMED_SQL_TABLE_NAME, + DERIVED_DOT_SQL, + DERIVED_TABLE, + NAME, + SQL, + SQL_TABLE_NAME, + dev, + prod, +) from datahub.ingestion.source.looker.looker_liquid_tag import ( CustomTagException, create_template, ) -from datahub.ingestion.source.looker.lookml_config import DERIVED_VIEW_PATTERN -from datahub.ingestion.source.looker.str_functions import ( - remove_extra_spaces_and_newlines, +from datahub.ingestion.source.looker.lookml_config import ( + DERIVED_VIEW_PATTERN, + LookMLSourceConfig, ) logger = logging.getLogger(__name__) @@ -95,44 +108,311 @@ def resolve_liquid_variable(text: str, liquid_variable: Dict[Any, Any]) -> str: return text -def _complete_incomplete_sql(raw_view: dict, sql: str) -> str: +class LookMLViewTransformer(ABC): + """ + There are many transformations that we need to perform on the LookML view to make it suitable for metadata ingestion. + + These transformations include: + + 1. Evaluating Looker templates, such as `-- if prod --` comments. Example `LookMlIfCommentTransformer`. + + 2. Resolving Liquid templates. Example `LiquidVariableTransformer`. + + 3. Removing ${} from derived view patterns. Example `DropDerivedViewPatternTransformer`. + (e.g., changing ${view_name.SQL_TABLE_NAME} to 4. view_name.SQL_TABLE_NAME). + + 4. Completing incomplete SQL fragments. Example `IncompleteSqlTransformer`. + + Each transformer works on specific attributes of the LookML view. For example, the #4 transformation is only + applicable to the view.derived.sql attribute, while the other transformations apply to both the + view.sql_table_name and view.derived.sql attributes. + + This class contains the logic to ensure that the transformer is applied to specific attributes and returns a + dictionary containing the transformed data. + + For example: + In case of #1 and #2, it returns: + + **transformed derived_table:** + ``` + { + "derived_table": { + "datahub_transformed_sql": "" + } + } + ``` + + **Whereas original was:** + ``` + { + "derived_table": { + "sql": "" + } + } + ``` + + In case #3, it returns: + **transformed sql_table_name:** + ``` + { + "datahub_transformed_sql_table_name": "employee_income_source.SQL_TABLE_NAME" + } + ``` + + **Whereas original was:** + ``` + { + "sql_table_name": "${employee_income_source.SQL_TABLE_NAME}" + } + ``` + + In case #4, it returns: + **transformed derived_table:** + ``` + { + "derived_table": { + "datahub_transformed_sql": "SELECT column_a, column_b FROM foo" + } + } + ``` + + **Whereas original was:** + ``` + { + "derived_table": { + "sql": "column_a, column_b" + } + } + ``` + + Each transformation generates a section of the transformed dictionary with a new attribute named + `datahub_transformed_`. + """ + + source_config: LookMLSourceConfig + + def __init__(self, source_config: LookMLSourceConfig): + self.source_config = source_config + + def transform(self, view: dict) -> dict: + value_to_transform: Optional[str] = None + + # is_attribute_supported check is required because not all transformer works on all attributes in current + # case mostly all transformer works on sql_table_name and derived.sql attributes, + # however IncompleteSqlTransformer only transform the derived.sql attribute + if SQL_TABLE_NAME in view and self.is_attribute_supported(SQL_TABLE_NAME): + # Give precedence to already processed transformed view.sql_table_name to apply more transformation + value_to_transform = view.get( + DATAHUB_TRANSFORMED_SQL_TABLE_NAME, view[SQL_TABLE_NAME] + ) + + if ( + DERIVED_TABLE in view + and SQL in view[DERIVED_TABLE] + and self.is_attribute_supported(DERIVED_DOT_SQL) + ): + # Give precedence to already processed transformed view.derived.sql to apply more transformation + value_to_transform = view[DERIVED_TABLE].get( + DATAHUB_TRANSFORMED_SQL, view[DERIVED_TABLE][SQL] + ) + + if value_to_transform is None: + return {} + + logger.debug(f"value to transform = {value_to_transform}") + + transformed_value: str = self._apply_transformation( + value=value_to_transform, view=view + ) + + logger.debug(f"transformed value = {transformed_value}") + + if SQL_TABLE_NAME in view and value_to_transform: + return {DATAHUB_TRANSFORMED_SQL_TABLE_NAME: transformed_value} + + if DERIVED_TABLE in view and SQL in view[DERIVED_TABLE] and value_to_transform: + return {DERIVED_TABLE: {DATAHUB_TRANSFORMED_SQL: transformed_value}} + + return {} + + @abstractmethod + def _apply_transformation(self, value: str, view: dict) -> str: + pass + + def is_attribute_supported(self, attribute: str) -> bool: + return attribute in [DERIVED_DOT_SQL, SQL_TABLE_NAME] + + +class LiquidVariableTransformer(LookMLViewTransformer): + """ + Replace the liquid variables with their values. + """ + + def _apply_transformation(self, value: str, view: dict) -> str: + return resolve_liquid_variable( + text=value, + liquid_variable=self.source_config.liquid_variable, + ) + + +class IncompleteSqlTransformer(LookMLViewTransformer): + """ + lookml view may contain the fragment of sql, however for lineage generation we need a complete sql. + IncompleteSqlTransformer will complete the view's derived.sql. + """ + + def is_attribute_supported(self, attribute: str) -> bool: + return attribute in [DERIVED_DOT_SQL] + + def _apply_transformation(self, value: str, view: dict) -> str: + if DERIVED_TABLE not in view or SQL not in view[DERIVED_TABLE]: + # This transformation is only applicable in-case of view contains view.derived.sql + return value + + # Looker supports sql fragments that omit the SELECT and FROM parts of the query + # Add those in if we detect that it is missing + sql_query: str = value + + if not re.search(r"SELECT\s", sql_query, flags=re.I): + # add a SELECT clause at the beginning + sql_query = f"SELECT {sql_query}" + + if not re.search(r"FROM\s", sql_query, flags=re.I): + # add a FROM clause at the end + sql_query = f"{sql_query} FROM {view[NAME]}" + + return sql_query + + +class DropDerivedViewPatternTransformer(LookMLViewTransformer): + """ + drop ${} from datahub_transformed_sql_table_name and view["derived_table"]["datahub_transformed_sql_table_name"] values. + + Example: transform ${employee_income_source.SQL_TABLE_NAME} to employee_income_source.SQL_TABLE_NAME + """ + + def _apply_transformation(self, value: str, view: dict) -> str: + return re.sub( + DERIVED_VIEW_PATTERN, + r"\1", + value, + ) + + +class LookMlIfCommentTransformer(LookMLViewTransformer): + """ + Evaluate the looker -- if -- comments. + """ + + evaluate_to_true_regx: str + remove_if_comment_line_regx: str + + def __init__(self, source_config: LookMLSourceConfig): + super().__init__(source_config=source_config) + + # This regx will keep whatever after -- if looker_environment -- + self.evaluate_to_true_regx = r"-- if {} --".format( + self.source_config.looker_environment + ) + + # It will remove all other lines starts with -- if ... -- + self.remove_if_comment_line_regx = r"-- if {} --.*?(?=\n|-- if|$)".format( + dev if self.source_config.looker_environment.lower() == prod else prod + ) + + def _apply_regx(self, value: str) -> str: + result: str = re.sub( + self.remove_if_comment_line_regx, "", value, flags=re.IGNORECASE | re.DOTALL + ) + + # Remove '-- if prod --' but keep the rest of the line + result = re.sub(self.evaluate_to_true_regx, "", result, flags=re.IGNORECASE) + + return result + + def _apply_transformation(self, value: str, view: dict) -> str: + return self._apply_regx(value) - # Looker supports sql fragments that omit the SELECT and FROM parts of the query - # Add those in if we detect that it is missing - sql_query: str = sql - if not re.search(r"SELECT\s", sql_query, flags=re.I): - # add a SELECT clause at the beginning - sql_query = f"SELECT {sql}" +class TransformedLookMlView: + """ + TransformedLookMlView is collecting output of LookMLViewTransformer and creating a new transformed LookML view. + TransformedLookMlView creates a copy of the original view dictionary and updates the copy with the transformed output. + The deepmerge library is used because Python's dict.update function doesn't merge nested fields. - if not re.search(r"FROM\s", sql_query, flags=re.I): - # add a FROM clause at the end - sql_query = f"{sql_query} FROM {raw_view['name']}" + The transformed LookML view will contain the following attributes: - # Drop ${ and } - return re.sub(DERIVED_VIEW_PATTERN, r"\1", sql_query) + ``` + { + "derived_table": { + "sql": "" + }, + dimensions ..... + } + ``` + see documentation of LookMLViewTransformer for output of each transformer. + """ -def resolve_liquid_variable_in_view_dict( - raw_view: dict, liquid_variable: Dict[Any, Any] + transformers: List[LookMLViewTransformer] + view_dict: dict + transformed_dict: dict + + def __init__( + self, + transformers: List[LookMLViewTransformer], + view_dict: dict, + ): + self.transformers = transformers + self.view_dict = view_dict + self.transformed_dict = {} + + def view(self) -> dict: + if self.transformed_dict: + return self.transformed_dict + + self.transformed_dict = {**self.view_dict} + + logger.debug(f"Processing view {self.view_dict[NAME]}") + + for transformer in self.transformers: + logger.debug(f"Applying transformer {transformer.__class__.__name__}") + + self.transformed_dict = always_merger.merge( + self.transformed_dict, transformer.transform(self.transformed_dict) + ) + + return self.transformed_dict + + +def process_lookml_template_language( + source_config: LookMLSourceConfig, + view_lkml_file_dict: dict, ) -> None: - if "views" not in raw_view: + if "views" not in view_lkml_file_dict: return - for view in raw_view["views"]: - if "sql_table_name" in view: - view["datahub_transformed_sql_table_name"] = resolve_liquid_variable( - text=remove_extra_spaces_and_newlines(view["sql_table_name"]), - liquid_variable=liquid_variable, - ) # keeping original sql_table_name as is to avoid any visualization issue later - - if "derived_table" in view and "sql" in view["derived_table"]: - # In sql we don't need to remove the extra spaces as sql parser takes care of extra spaces and \n - # while generating URN from sql - view["derived_table"]["datahub_transformed_sql"] = resolve_liquid_variable( - text=view["derived_table"]["sql"], liquid_variable=liquid_variable - ) # keeping original sql as is, so that on UI sql will be shown same is it is visible on looker portal - - view["derived_table"]["datahub_transformed_sql"] = _complete_incomplete_sql( - raw_view=view, sql=view["derived_table"]["datahub_transformed_sql"] - ) + transformers: List[LookMLViewTransformer] = [ + LookMlIfCommentTransformer( + source_config=source_config + ), # First evaluate the -- if -- comments. Looker does the same + LiquidVariableTransformer( + source_config=source_config + ), # Now resolve liquid variables + DropDerivedViewPatternTransformer( + source_config=source_config + ), # Remove any ${} symbol + IncompleteSqlTransformer( + source_config=source_config + ), # complete any incomplete sql + ] + + transformed_views: List[dict] = [] + + for view in view_lkml_file_dict["views"]: + transformed_views.append( + TransformedLookMlView(transformers=transformers, view_dict=view).view() + ) + + view_lkml_file_dict["views"] = transformed_views diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py index a83aa2638ec964..69b9f842ac14db 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py @@ -11,12 +11,14 @@ from datahub.ingestion.source.looker.looker_dataclasses import LookerViewFile from datahub.ingestion.source.looker.looker_file_loader import LookerViewFileLoader from datahub.ingestion.source.looker.lookml_config import ( - DERIVED_VIEW_PATTERN, DERIVED_VIEW_SUFFIX, NAME, LookMLSourceReport, ) from datahub.ingestion.source.looker.lookml_refinement import LookerRefinementResolver +from datahub.ingestion.source.looker.str_functions import ( + remove_extra_spaces_and_newlines, +) logger = logging.getLogger(__name__) @@ -56,7 +58,7 @@ def column_name_in_sql_attribute(self) -> List[str]: class LookerViewContext: """ - There are six patterns to associate the view's fields with dataset + There are seven patterns to associate the view's fields with dataset Pattern1: view: view_name { @@ -161,6 +163,36 @@ class LookerViewContext: For all possible options of "sql" attribute please refer looker doc: https://cloud.google.com/looker/docs/reference/param-field-sql + For pattern 6 i.e. view.derived.sql, The looker creates a temporary table to store the sql result, + However if we don't want to have a temporary table and want looker to always execute the sql to fetch the result then + in that case pattern 7 is useful (mentioned below). + + Pattern7: + view: customer_sales { + sql_table_name: ( + SELECT + customer_id, + SUM(sales_amount) AS total_sales + FROM + sales + GROUP BY + customer_id + ) ;; + + dimension: customer_id { + sql: ${TABLE}.customer_id ;; + } + + measure: total_sales { + type: sum + sql: ${TABLE}.total_sales ;; + } + } + + + In Pattern7 the fields' upstream dataset is the output of sql mentioned in + customer_sales.sql_table_name. + """ raw_view: Dict @@ -252,6 +284,7 @@ def _get_sql_table_name_field(self) -> Optional[str]: return self.get_including_extends(field="sql_table_name") def _is_dot_sql_table_name_present(self) -> bool: + sql_table_name: Optional[str] = self._get_sql_table_name_field() if sql_table_name is None: @@ -268,7 +301,7 @@ def sql_table_name(self) -> str: if sql_table_name is None: sql_table_name = self.raw_view[NAME].lower() - return sql_table_name + return sql_table_name.lower() def datahub_transformed_sql_table_name(self) -> str: table_name: Optional[str] = self.raw_view.get( @@ -278,13 +311,13 @@ def datahub_transformed_sql_table_name(self) -> str: if not table_name: table_name = self.sql_table_name() - # sql_table_name is in the format "${view-name}.SQL_TABLE_NAME" - # remove extra characters - if self._is_dot_sql_table_name_present(): - table_name = re.sub(DERIVED_VIEW_PATTERN, r"\1", table_name) + # remove extra spaces and new lines from sql_table_name if it is not a sql + if not self.is_direct_sql_query_case(): + # Some sql_table_name fields contain quotes like: optimizely."group", just remove the quotes + table_name = table_name.replace('"', "").replace("`", "").lower() + table_name = remove_extra_spaces_and_newlines(table_name).strip() - # Some sql_table_name fields contain quotes like: optimizely."group", just remove the quotes - return table_name.replace('"', "").replace("`", "").lower() + return table_name def derived_table(self) -> Dict[Any, Any]: """ @@ -371,6 +404,11 @@ def is_materialized_derived_view(self) -> bool: def is_regular_case(self) -> bool: # regular-case is pattern1 and 2 where upstream table is either view-name or # table name mentioned in sql_table_name attribute + + # It should not be the sql query + if self.is_direct_sql_query_case(): + return False + if ( self.is_sql_table_name_referring_to_view() or self.is_sql_based_derived_case() @@ -381,6 +419,9 @@ def is_regular_case(self) -> bool: return True def is_sql_table_name_referring_to_view(self) -> bool: + if self.is_direct_sql_query_case(): + return False + # It is pattern3 return self._is_dot_sql_table_name_present() @@ -413,3 +454,14 @@ def is_sql_based_derived_view_without_fields_case(self) -> bool: return True return False + + def is_direct_sql_query_case(self) -> bool: + # pattern 7 + # sqlglot doesn't have a function to validate whether text is valid SQL or not. + # Applying a simple logic to check if sql_table_name contains a sql. + # if sql_table_name contains sql then its value starts with "(" and checking if "select" is present in side the + # text + return ( + self.sql_table_name().strip().startswith("(") + and "select" in self.sql_table_name() + ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_config.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_config.py index f4fb1316b16a20..0bcee14ec77a1a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_config.py @@ -1,7 +1,7 @@ import logging from dataclasses import dataclass, field as dataclass_field from datetime import timedelta -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, List, Literal, Optional, Union import pydantic from pydantic import root_validator, validator @@ -174,6 +174,13 @@ class LookMLSourceConfig( "view.sql_table_name. Defaults to an empty dictionary.", ) + looker_environment: Literal["prod", "dev"] = Field( + "prod", + description="A looker prod or dev environment. " + "It helps to evaluate looker if comments i.e. -- if prod --. " + "All if comments are evaluated to true for configured looker_environment value", + ) + @validator("connection_to_platform_map", pre=True) def convert_string_to_connection_def(cls, conn_map): # Previous version of config supported strings in connection map. This upconverts strings to ConnectionMap diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py index d77e65ac733232..b00291caabbf68 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py @@ -669,7 +669,7 @@ def get_internal_workunits(self) -> Iterable[MetadataWorkUnit]: # noqa: C901 self.source_config.project_name, self.base_projects_folder, self.reporter, - self.source_config.liquid_variable, + self.source_config, ) # Some views can be mentioned by multiple 'include' statements and can be included via different connections. diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py b/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py index 98646e19a7014b..d5929b52aea3a3 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py @@ -237,7 +237,7 @@ def create_fields(self) -> List[ViewField]: return [] # it is for the special case -class SqlBasedDerivedViewUpstream(AbstractViewUpstream): +class SqlBasedDerivedViewUpstream(AbstractViewUpstream, ABC): """ Handle the case where upstream dataset is defined in derived_table.sql """ @@ -263,7 +263,7 @@ def __get_spr(self) -> Optional[SqlParsingResult]: return None spr = create_lineage_sql_parsed_result( - query=self.view_context.datahub_transformed_sql(), + query=self.get_sql_query(), default_schema=self.view_context.view_connection.default_schema, default_db=self.view_context.view_connection.default_db, platform=self.view_context.view_connection.platform, @@ -390,6 +390,28 @@ def get_upstream_column_ref( def get_upstream_dataset_urn(self) -> List[Urn]: return self._get_upstream_dataset_urn() + @abstractmethod + def get_sql_query(self) -> str: + pass + + +class DirectQueryUpstreamSource(SqlBasedDerivedViewUpstream): + """ + Pattern 7 as per view-context documentation + """ + + def get_sql_query(self) -> str: + return self.view_context.datahub_transformed_sql_table_name() + + +class DerivedQueryUpstreamSource(SqlBasedDerivedViewUpstream): + """ + Pattern 4 as per view-context documentation + """ + + def get_sql_query(self) -> str: + return self.view_context.datahub_transformed_sql() + class NativeDerivedViewUpstream(AbstractViewUpstream): """ @@ -611,6 +633,7 @@ def create_view_upstream( ctx: PipelineContext, reporter: LookMLSourceReport, ) -> AbstractViewUpstream: + if view_context.is_regular_case(): return RegularViewUpstream( view_context=view_context, @@ -629,11 +652,23 @@ def create_view_upstream( looker_view_id_cache=looker_view_id_cache, ) - if ( - view_context.is_sql_based_derived_case() - or view_context.is_sql_based_derived_view_without_fields_case() + if any( + [ + view_context.is_sql_based_derived_case(), + view_context.is_sql_based_derived_view_without_fields_case(), + ] ): - return SqlBasedDerivedViewUpstream( + + return DerivedQueryUpstreamSource( + view_context=view_context, + config=config, + reporter=reporter, + ctx=ctx, + looker_view_id_cache=looker_view_id_cache, + ) + + if view_context.is_direct_sql_query_case(): + return DirectQueryUpstreamSource( view_context=view_context, config=config, reporter=reporter, @@ -651,9 +686,9 @@ def create_view_upstream( ) reporter.report_warning( - title="Implementation Not Found", + title="ViewUpstream Implementation Not Found", message="No implementation found to resolve upstream of the view", - context=view_context.view_file_name(), + context=f"view_name={view_context.name()} , view_file_name={view_context.view_file_name()}", ) return EmptyImplementation( diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py index e0a72c71a1ef00..a2d841c3f8fdce 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py @@ -89,6 +89,7 @@ from datahub.metadata.urns import ChartUrn from datahub.sql_parsing.sqlglot_lineage import ColumnLineageInfo from datahub.utilities.dedup_list import deduplicate_list +from datahub.utilities.urns.urn_iter import lowercase_dataset_urn # Logger instance logger = logging.getLogger(__name__) @@ -127,7 +128,7 @@ def __init__( @staticmethod def urn_to_lowercase(value: str, flag: bool) -> str: if flag is True: - return value.lower() + return lowercase_dataset_urn(value) return value @@ -390,11 +391,13 @@ def to_datahub_dataset( for table in dataset.tables: # Create a URN for dataset - ds_urn = builder.make_dataset_urn_with_platform_instance( - platform=self.__config.platform_name, - name=self.assets_urn_to_lowercase(table.full_name), - platform_instance=self.__config.platform_instance, - env=self.__config.env, + ds_urn = self.assets_urn_to_lowercase( + builder.make_dataset_urn_with_platform_instance( + platform=self.__config.platform_name, + name=table.full_name, + platform_instance=self.__config.platform_instance, + env=self.__config.env, + ) ) logger.debug(f"dataset_urn={ds_urn}") diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift_schema.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift_schema.py index 6e88a50f898a5d..2e628269edbc37 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift_schema.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift_schema.py @@ -504,7 +504,11 @@ def get_alter_table_commands( yield AlterTableRow( transaction_id=row[field_names.index("transaction_id")], session_id=session_id, - query_text=row[field_names.index("query_text")], + # See https://docs.aws.amazon.com/redshift/latest/dg/r_STL_QUERYTEXT.html + # for why we need to replace the \n with a newline. + query_text=row[field_names.index("query_text")].replace( + r"\n", "\n" + ), start_time=row[field_names.index("start_time")], ) rows = cursor.fetchmany() diff --git a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py index b8c7fd5aa88fc1..f81d06c35e3b09 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py @@ -8,32 +8,13 @@ from collections import OrderedDict from datetime import datetime from pathlib import PurePath -from typing import Any, Dict, Iterable, List, Optional, Tuple +from typing import Dict, Iterable, List, Optional, Tuple import smart_open.compression as so_compression from more_itertools import peekable from pyspark.conf import SparkConf from pyspark.sql import SparkSession from pyspark.sql.dataframe import DataFrame -from pyspark.sql.types import ( - ArrayType, - BinaryType, - BooleanType, - ByteType, - DateType, - DecimalType, - DoubleType, - FloatType, - IntegerType, - LongType, - MapType, - NullType, - ShortType, - StringType, - StructField, - StructType, - TimestampType, -) from pyspark.sql.utils import AnalysisException from smart_open import open as smart_open @@ -52,7 +33,7 @@ platform_name, support_status, ) -from datahub.ingestion.api.source import MetadataWorkUnitProcessor, SourceReport +from datahub.ingestion.api.source import MetadataWorkUnitProcessor from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source.aws.s3_boto_utils import get_s3_tags, list_folders from datahub.ingestion.source.aws.s3_util import ( @@ -72,22 +53,13 @@ StatefulIngestionSourceBase, ) from datahub.metadata.com.linkedin.pegasus2avro.schema import ( - BooleanTypeClass, - BytesTypeClass, - DateTypeClass, - NullTypeClass, - NumberTypeClass, - RecordTypeClass, SchemaField, - SchemaFieldDataType, SchemaMetadata, StringTypeClass, - TimeTypeClass, ) from datahub.metadata.schema_classes import ( DataPlatformInstanceClass, DatasetPropertiesClass, - MapTypeClass, OperationClass, OperationTypeClass, OtherSchemaClass, @@ -101,55 +73,12 @@ logging.getLogger("py4j").setLevel(logging.ERROR) logger: logging.Logger = logging.getLogger(__name__) -# for a list of all types, see https://spark.apache.org/docs/3.0.3/api/python/_modules/pyspark/sql/types.html -_field_type_mapping = { - NullType: NullTypeClass, - StringType: StringTypeClass, - BinaryType: BytesTypeClass, - BooleanType: BooleanTypeClass, - DateType: DateTypeClass, - TimestampType: TimeTypeClass, - DecimalType: NumberTypeClass, - DoubleType: NumberTypeClass, - FloatType: NumberTypeClass, - ByteType: BytesTypeClass, - IntegerType: NumberTypeClass, - LongType: NumberTypeClass, - ShortType: NumberTypeClass, - ArrayType: NullTypeClass, - MapType: MapTypeClass, - StructField: RecordTypeClass, - StructType: RecordTypeClass, -} PAGE_SIZE = 1000 # Hack to support the .gzip extension with smart_open. so_compression.register_compressor(".gzip", so_compression._COMPRESSOR_REGISTRY[".gz"]) -def get_column_type( - report: SourceReport, dataset_name: str, column_type: str -) -> SchemaFieldDataType: - """ - Maps known Spark types to datahub types - """ - TypeClass: Any = None - - for field_type, type_class in _field_type_mapping.items(): - if isinstance(column_type, field_type): - TypeClass = type_class - break - - # if still not found, report the warning - if TypeClass is None: - report.report_warning( - dataset_name, f"unable to map type {column_type} to metadata schema" - ) - TypeClass = NullTypeClass - - return SchemaFieldDataType(type=TypeClass()) - - # config flags to emit telemetry for config_options_to_report = [ "platform", @@ -490,9 +419,7 @@ def add_partition_columns_to_schema( if not is_fieldpath_v2 else f"[version=2.0].[type=string].{partition_key}", nativeDataType="string", - type=SchemaFieldDataType(StringTypeClass()) - if not is_fieldpath_v2 - else SchemaFieldDataTypeClass(type=StringTypeClass()), + type=SchemaFieldDataTypeClass(StringTypeClass()), isPartitioningKey=True, nullable=True, recursive=False, diff --git a/metadata-ingestion/src/datahub/ingestion/source/salesforce.py b/metadata-ingestion/src/datahub/ingestion/source/salesforce.py index 42128123c61442..7a7f1f30950eb6 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/salesforce.py +++ b/metadata-ingestion/src/datahub/ingestion/source/salesforce.py @@ -3,7 +3,7 @@ import time from datetime import datetime from enum import Enum -from typing import Dict, Iterable, List, Optional +from typing import Any, Dict, Iterable, List, Optional import requests from pydantic import Field, validator @@ -124,6 +124,9 @@ class SalesforceConfig(DatasetSourceConfigMixin): default=dict(), description='Regex patterns for tables/schemas to describe domain_key domain key (domain_key can be any string like "sales".) There can be multiple domain keys specified.', ) + api_version: Optional[str] = Field( + description="If specified, overrides default version used by the Salesforce package. Example value: '59.0'" + ) profiling: SalesforceProfilingConfig = SalesforceProfilingConfig() @@ -222,6 +225,12 @@ def __init__(self, config: SalesforceConfig, ctx: PipelineContext) -> None: self.session = requests.Session() self.platform: str = "salesforce" self.fieldCounts = {} + common_args: Dict[str, Any] = { + "domain": "test" if self.config.is_sandbox else None, + "session": self.session, + } + if self.config.api_version: + common_args["version"] = self.config.api_version try: if self.config.auth is SalesforceAuthType.DIRECT_ACCESS_TOKEN: @@ -236,8 +245,7 @@ def __init__(self, config: SalesforceConfig, ctx: PipelineContext) -> None: self.sf = Salesforce( instance_url=self.config.instance_url, session_id=self.config.access_token, - session=self.session, - domain="test" if self.config.is_sandbox else None, + **common_args, ) elif self.config.auth is SalesforceAuthType.USERNAME_PASSWORD: logger.debug("Username/Password Provided in Config") @@ -255,8 +263,7 @@ def __init__(self, config: SalesforceConfig, ctx: PipelineContext) -> None: username=self.config.username, password=self.config.password, security_token=self.config.security_token, - session=self.session, - domain="test" if self.config.is_sandbox else None, + **common_args, ) elif self.config.auth is SalesforceAuthType.JSON_WEB_TOKEN: @@ -275,14 +282,13 @@ def __init__(self, config: SalesforceConfig, ctx: PipelineContext) -> None: username=self.config.username, consumer_key=self.config.consumer_key, privatekey=self.config.private_key, - session=self.session, - domain="test" if self.config.is_sandbox else None, + **common_args, ) except Exception as e: logger.error(e) raise ConfigurationError("Salesforce login failed") from e - else: + if not self.config.api_version: # List all REST API versions and use latest one versions_url = "https://{instance}/services/data/".format( instance=self.sf.sf_instance, @@ -290,17 +296,22 @@ def __init__(self, config: SalesforceConfig, ctx: PipelineContext) -> None: versions_response = self.sf._call_salesforce("GET", versions_url).json() latest_version = versions_response[-1] version = latest_version["version"] + # we could avoid setting the version like below (after the Salesforce object has been already initiated + # above), since, according to the docs: + # https://developer.salesforce.com/docs/atlas.en-us.api_rest.meta/api_rest/dome_versions.htm + # we don't need to be authenticated to list the versions (so we could perform this call before even + # authenticating) self.sf.sf_version = version - self.base_url = "https://{instance}/services/data/v{sf_version}/".format( - instance=self.sf.sf_instance, sf_version=version - ) + self.base_url = "https://{instance}/services/data/v{sf_version}/".format( + instance=self.sf.sf_instance, sf_version=self.sf.sf_version + ) - logger.debug( - "Using Salesforce REST API with {label} version: {version}".format( - label=latest_version["label"], version=latest_version["version"] - ) + logger.debug( + "Using Salesforce REST API version: {version}".format( + version=self.sf.sf_version ) + ) def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: sObjects = self.get_salesforce_objects() diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py index 1d4a5b377da147..a64589bcfed02a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py @@ -440,7 +440,7 @@ def _process_schema( yield from self._process_tag(tag) if not snowflake_schema.views and not snowflake_schema.tables: - self.structured_reporter.warning( + self.structured_reporter.info( title="No tables/views found in schema", message="If tables exist, please grant REFERENCES or SELECT permissions on them.", context=f"{db_name}.{schema_name}", diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py index a1878963d3798e..0177d59ef6b21a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py @@ -127,6 +127,8 @@ def is_dataset_pattern_allowed( SnowflakeObjectDomain.MATERIALIZED_VIEW, ): return False + if _is_sys_table(dataset_name): + return False if len(dataset_params) != 3: self.structured_reporter.info( @@ -176,6 +178,11 @@ def _combine_identifier_parts( return f"{db_name}.{schema_name}.{table_name}" +def _is_sys_table(table_name: str) -> bool: + # Often will look like `SYS$_UNPIVOT_VIEW1737` or `sys$_pivot_view19`. + return table_name.lower().startswith("sys$") + + # Qualified Object names from snowflake audit logs have quotes for for snowflake quoted identifiers, # For example "test-database"."test-schema".test_table # whereas we generate urns without quotes even for quoted identifiers for backward compatibility @@ -186,12 +193,13 @@ def _cleanup_qualified_name( ) -> str: name_parts = qualified_name.split(".") if len(name_parts) != 3: - structured_reporter.info( - title="Unexpected dataset pattern", - message="We failed to parse a Snowflake qualified name into its constituent parts. " - "DB/schema/table filtering may not work as expected on these entities.", - context=f"{qualified_name} has {len(name_parts)} parts", - ) + if not _is_sys_table(qualified_name): + structured_reporter.info( + title="Unexpected dataset pattern", + message="We failed to parse a Snowflake qualified name into its constituent parts. " + "DB/schema/table filtering may not work as expected on these entities.", + context=f"{qualified_name} has {len(name_parts)} parts", + ) return qualified_name.replace('"', "") return _combine_identifier_parts( db_name=name_parts[0].strip('"'), diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py b/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py index 398adc3708ef2b..5cc51882965a0c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py @@ -499,6 +499,7 @@ def get_schema_fields_for_column( self, dataset_name: str, column: Dict, + inspector: Inspector, pk_constraints: Optional[dict] = None, partition_keys: Optional[List[str]] = None, tags: Optional[List[str]] = None, @@ -506,6 +507,7 @@ def get_schema_fields_for_column( fields = get_schema_fields_for_sqlalchemy_column( column_name=column["name"], column_type=column["type"], + inspector=inspector, description=column.get("comment", None), nullable=column.get("nullable", True), is_part_of_key=( diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/hive.py b/metadata-ingestion/src/datahub/ingestion/source/sql/hive.py index 65f8516fd340a3..59f301baf40165 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/hive.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/hive.py @@ -169,12 +169,16 @@ def get_schema_fields_for_column( self, dataset_name: str, column: Dict[Any, Any], + inspector: Inspector, pk_constraints: Optional[Dict[Any, Any]] = None, partition_keys: Optional[List[str]] = None, tags: Optional[List[str]] = None, ) -> List[SchemaField]: fields = super().get_schema_fields_for_column( - dataset_name, column, pk_constraints + dataset_name, + column, + inspector, + pk_constraints, ) if self._COMPLEX_TYPE.match(fields[0].nativeDataType) and isinstance( diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/hive_metastore.py b/metadata-ingestion/src/datahub/ingestion/source/sql/hive_metastore.py index 655d1ba68ed79e..9da6c294881247 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/hive_metastore.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/hive_metastore.py @@ -521,7 +521,7 @@ def loop_tables( ) # add table schema fields - schema_fields = self.get_schema_fields(dataset_name, columns) + schema_fields = self.get_schema_fields(dataset_name, columns, inspector) self._set_partition_key(columns, schema_fields) @@ -754,7 +754,9 @@ def loop_views( # add view schema fields schema_fields = self.get_schema_fields( - dataset.dataset_name, dataset.columns + dataset.dataset_name, + dataset.columns, + inspector, ) schema_metadata = get_schema_metadata( @@ -877,6 +879,7 @@ def get_schema_fields_for_column( self, dataset_name: str, column: Dict[Any, Any], + inspector: Inspector, pk_constraints: Optional[Dict[Any, Any]] = None, partition_keys: Optional[List[str]] = None, tags: Optional[List[str]] = None, diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py index de3012cc335681..2ab1e6bb41af1c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py @@ -120,6 +120,9 @@ from datahub.utilities.lossy_collections import LossyList from datahub.utilities.registries.domain_registry import DomainRegistry from datahub.utilities.sqlalchemy_query_combiner import SQLAlchemyQueryCombinerReport +from datahub.utilities.sqlalchemy_type_converter import ( + get_native_data_type_for_sqlalchemy_type, +) if TYPE_CHECKING: from datahub.ingestion.source.ge_data_profiler import ( @@ -260,8 +263,11 @@ def get_column_type( break if TypeClass is None: - sql_report.report_warning( - dataset_name, f"unable to map type {column_type!r} to metadata schema" + sql_report.info( + title="Unable to map column types to DataHub types", + message="Got an unexpected column type. The column's parsed field type will not be populated.", + context=f"{dataset_name} - {column_type!r}", + log=False, ) TypeClass = NullTypeClass @@ -807,6 +813,7 @@ def _process_table( schema_fields = self.get_schema_fields( dataset_name, columns, + inspector, pk_constraints, tags=extra_tags, partition_keys=partitions, @@ -987,6 +994,7 @@ def get_schema_fields( self, dataset_name: str, columns: List[dict], + inspector: Inspector, pk_constraints: Optional[dict] = None, partition_keys: Optional[List[str]] = None, tags: Optional[Dict[str, List[str]]] = None, @@ -999,6 +1007,7 @@ def get_schema_fields( fields = self.get_schema_fields_for_column( dataset_name, column, + inspector, pk_constraints, tags=column_tags, partition_keys=partition_keys, @@ -1010,6 +1019,7 @@ def get_schema_fields_for_column( self, dataset_name: str, column: dict, + inspector: Inspector, pk_constraints: Optional[dict] = None, partition_keys: Optional[List[str]] = None, tags: Optional[List[str]] = None, @@ -1019,10 +1029,16 @@ def get_schema_fields_for_column( tags_str = [make_tag_urn(t) for t in tags] tags_tac = [TagAssociationClass(t) for t in tags_str] gtc = GlobalTagsClass(tags_tac) + full_type = column.get("full_type") field = SchemaField( fieldPath=column["name"], type=get_column_type(self.report, dataset_name, column["type"]), - nativeDataType=column.get("full_type", repr(column["type"])), + nativeDataType=full_type + if full_type is not None + else get_native_data_type_for_sqlalchemy_type( + column["type"], + inspector=inspector, + ), description=column.get("comment", None), nullable=column["nullable"], recursive=False, @@ -1099,7 +1115,7 @@ def _process_view( self.warn(logger, dataset_name, "unable to get schema for this view") schema_metadata = None else: - schema_fields = self.get_schema_fields(dataset_name, columns) + schema_fields = self.get_schema_fields(dataset_name, columns, inspector) schema_metadata = get_schema_metadata( self.report, dataset_name, diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py b/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py index cc0a43bc5e8749..b6fa51dd70e18d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py @@ -387,12 +387,16 @@ def get_schema_fields_for_column( self, dataset_name: str, column: dict, + inspector: Inspector, pk_constraints: Optional[dict] = None, partition_keys: Optional[List[str]] = None, tags: Optional[List[str]] = None, ) -> List[SchemaField]: fields = super().get_schema_fields_for_column( - dataset_name, column, pk_constraints + dataset_name, + column, + inspector, + pk_constraints, ) if isinstance(column["type"], (datatype.ROW, sqltypes.ARRAY, datatype.MAP)): diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py b/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py index ae56fb87ee5281..a340f049731c46 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py @@ -469,7 +469,12 @@ def _process_projections( foreign_keys = self._get_foreign_keys( dataset_urn, inspector, schema, projection ) - schema_fields = self.get_schema_fields(dataset_name, columns, pk_constraints) + schema_fields = self.get_schema_fields( + dataset_name, + columns, + inspector, + pk_constraints, + ) schema_metadata = get_schema_metadata( self.report, dataset_name, @@ -673,7 +678,7 @@ def _process_models( ) dataset_snapshot.aspects.append(dataset_properties) - schema_fields = self.get_schema_fields(dataset_name, columns) + schema_fields = self.get_schema_fields(dataset_name, columns, inspector) schema_metadata = get_schema_metadata( self.report, diff --git a/metadata-ingestion/src/datahub/ingestion/source/state/entity_removal_state.py b/metadata-ingestion/src/datahub/ingestion/source/state/entity_removal_state.py index 0f031177c403a3..f011aa7bdd19e4 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/state/entity_removal_state.py +++ b/metadata-ingestion/src/datahub/ingestion/source/state/entity_removal_state.py @@ -131,6 +131,9 @@ def get_percent_entities_changed( new_entities=self.urns, old_entities=old_checkpoint_state.urns ) + def urn_count(self) -> int: + return len(self.urns) + def compute_percent_entities_changed( new_entities: List[str], old_entities: List[str] diff --git a/metadata-ingestion/src/datahub/ingestion/source/state/stale_entity_removal_handler.py b/metadata-ingestion/src/datahub/ingestion/source/state/stale_entity_removal_handler.py index 7ba06fe24155d1..c73472f1b8041e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/state/stale_entity_removal_handler.py +++ b/metadata-ingestion/src/datahub/ingestion/source/state/stale_entity_removal_handler.py @@ -42,7 +42,7 @@ class StatefulStaleMetadataRemovalConfig(StatefulIngestionConfig): description="Soft-deletes the entities present in the last successful run but missing in the current run with stateful_ingestion enabled.", ) fail_safe_threshold: float = pydantic.Field( - default=40.0, + default=75.0, description="Prevents large amount of soft deletes & the state from committing from accidental changes to the source configuration if the relative change percent in entities compared to the previous state is above the 'fail_safe_threshold'.", le=100.0, ge=0.0, @@ -257,13 +257,33 @@ def gen_removed_entity_workunits(self) -> Iterable[MetadataWorkUnit]: assert self.stateful_ingestion_config - copy_previous_state_and_fail = False + copy_previous_state_and_exit = False + + # If the source already had a failure, skip soft-deletion. + # TODO: Eventually, switch this to check if anything in the pipeline had a failure so far, not just the source. + if self.source.get_report().failures: + self.source.get_report().report_warning( + title="Skipping stateful ingestion / stale entity removal", + message="The soft-deletion of stale entities will be skipped because the source reported a failure.", + ) + copy_previous_state_and_exit = True + + if ( + not copy_previous_state_and_exit + and self.source.get_report().events_produced == 0 + ): + self.source.get_report().report_failure( + title="Skipping stateful ingestion / stale entity removal", + message="The source did not produce any metadata. Despite stateful ingestion being enabled, we will not delete any metadata. " + "This is a fail-safe mechanism to prevent the accidental deletion of all entities.", + ) + copy_previous_state_and_exit = True # Check if the entity delta is below the fail-safe threshold. entity_difference_percent = cur_checkpoint_state.get_percent_entities_changed( last_checkpoint_state ) - if ( + if not copy_previous_state_and_exit and ( entity_difference_percent > self.stateful_ingestion_config.fail_safe_threshold # Adding this check to protect against cases where get_percent_entities_changed returns over 100%. @@ -273,30 +293,21 @@ def gen_removed_entity_workunits(self) -> Iterable[MetadataWorkUnit]: ): # Log the failure. This would prevent the current state from getting committed. self.source.get_report().report_failure( - "stale-entity-removal", - f"Will not soft-delete entities, since we'd be deleting {entity_difference_percent:.1f}% of the existing entities. " - f"To force a deletion, increase the value of 'stateful_ingestion.fail_safe_threshold' (currently {self.stateful_ingestion_config.fail_safe_threshold})", - ) - copy_previous_state_and_fail = True - - if self.source.get_report().events_produced == 0: - self.source.get_report().report_failure( - "stale-entity-removal", - "Skipping stale entity soft-deletion because the source produced no events. " - "This is a fail-safe mechanism to prevent accidental deletion of all entities.", - ) - copy_previous_state_and_fail = True - - # If the source already had a failure, skip soft-deletion. - # TODO: Eventually, switch this to check if anything in the pipeline had a failure so far, not just the source. - if self.source.get_report().failures: - self.source.get_report().report_warning( - "stale-entity-removal", - "Skipping stale entity soft-deletion and copying urns from last state since source already had failures.", + title="Skipping stateful ingestion / stale entity removal", + message=f"\ +The previous run produced {last_checkpoint_state.urn_count()} entities, whereas this run produced {cur_checkpoint_state.urn_count()} entities. \ +Comparing the entities produced this run vs the previous run, we would be deleting {entity_difference_percent:.1f}% of the entities produced by the previous run. \ +This percentage is above the threshold (currently {self.stateful_ingestion_config.fail_safe_threshold}), so we will skip soft-deleting stale entities.\ +\ +To update this threshold, add this to your recipe: \ +\ +stateful_ingestion:\ + fail_safe_threshold: \ +", ) - copy_previous_state_and_fail = True + copy_previous_state_and_exit = True - if copy_previous_state_and_fail: + if copy_previous_state_and_exit: logger.info( f"Copying urns from last state (size {len(last_checkpoint_state.urns)}) to current state (size {len(cur_checkpoint_state.urns)}) " "to ensure stale entities from previous runs are deleted on the next successful run." diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau.py index 9cde3b1f8d3a07..510cb6c96d1f28 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau.py @@ -757,6 +757,12 @@ def _re_authenticate(self): ] = self.config.get_tableau_auth(self.site.content_url) self.server.auth.sign_in(tableau_auth) + @property + def site_content_url(self) -> Optional[str]: + if self.site and self.site.content_url: + return self.site.content_url + return None + def _populate_usage_stat_registry(self) -> None: if self.server is None: return @@ -2524,7 +2530,9 @@ def emit_sheets_as_charts( last_modified = self.get_last_modified(creator, created_at, updated_at) if sheet.get(c.PATH): - site_part = f"/site/{self.site.content_url}" if self.site else "" + site_part = ( + f"/site/{self.site_content_url}" if self.site_content_url else "" + ) sheet_external_url = ( f"{self.config.connect_uri}/#{site_part}/views/{sheet.get(c.PATH)}" ) @@ -2535,7 +2543,7 @@ def emit_sheets_as_charts( and sheet[c.CONTAINED_IN_DASHBOARDS][0].get(c.PATH) ): # sheet contained in dashboard - site_part = f"/t/{self.site.content_url}" if self.site else "" + site_part = f"/t/{self.site_content_url}" if self.site_content_url else "" dashboard_path = sheet[c.CONTAINED_IN_DASHBOARDS][0][c.PATH] sheet_external_url = f"{self.config.connect_uri}{site_part}/authoring/{dashboard_path}/{quote(sheet.get(c.NAME, ''), safe='')}" else: @@ -2667,7 +2675,7 @@ def emit_workbook_as_container(self, workbook: Dict) -> Iterable[MetadataWorkUni else None ) - site_part = f"/site/{self.site.content_url}" if self.site else "" + site_part = f"/site/{self.site_content_url}" if self.site_content_url else "" workbook_uri = workbook.get("uri") workbook_part = ( workbook_uri[workbook_uri.index("/workbooks/") :] if workbook_uri else None @@ -2826,7 +2834,7 @@ def emit_dashboard( updated_at = dashboard.get(c.UPDATED_AT, datetime.now()) last_modified = self.get_last_modified(creator, created_at, updated_at) - site_part = f"/site/{self.site.content_url}" if self.site else "" + site_part = f"/site/{self.site_content_url}" if self.site_content_url else "" dashboard_external_url = ( f"{self.config.connect_uri}/#{site_part}/views/{dashboard.get(c.PATH, '')}" ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py b/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py index 93f2a0ef2f6a86..f3a9c4a5aa201e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py @@ -502,7 +502,12 @@ def get_tags_from_params(params: List[str] = []) -> GlobalTagsClass: def tableau_field_to_schema_field(field, ingest_tags): - nativeDataType = field.get("dataType", "UNKNOWN") + # The check here makes sure that even if 'dataType' key exists in the 'field' dictionary but has value None, + # it will be set as "UNKNOWN" (nativeDataType field can not be None in the SchemaField). + # Hence, field.get("dataType", "UNKNOWN") is not enough + nativeDataType = field.get("dataType") + if nativeDataType is None: + nativeDataType = "UNKNOWN" TypeClass = FIELD_TYPE_MAPPING.get(nativeDataType, NullTypeClass) schema_field = SchemaField( diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_profiling.py b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_profiling.py index 5d6d2bec6d2fcb..51546a79e05c32 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_profiling.py +++ b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_profiling.py @@ -1,15 +1,14 @@ import logging import time -from typing import Optional, Union +from typing import Optional from databricks.sdk import WorkspaceClient from databricks.sdk.core import DatabricksError from databricks.sdk.service._internal import Wait from databricks.sdk.service.catalog import TableInfo from databricks.sdk.service.sql import ( - ExecuteStatementResponse, - GetStatementResponse, GetWarehouseResponse, + StatementResponse, StatementState, StatementStatus, ) @@ -125,7 +124,7 @@ def _should_retry_unsupported_column( def _analyze_table( self, ref: TableReference, include_columns: bool - ) -> ExecuteStatementResponse: + ) -> StatementResponse: statement = f"ANALYZE TABLE {ref.schema}.{ref.table} COMPUTE STATISTICS" if include_columns: statement += " FOR ALL COLUMNS" @@ -139,7 +138,7 @@ def _analyze_table( return response def _check_analyze_table_statement_status( - self, execute_response: ExecuteStatementResponse, max_wait_secs: int + self, execute_response: StatementResponse, max_wait_secs: int ) -> bool: if not execute_response.statement_id or not execute_response.status: return False @@ -230,9 +229,7 @@ def _get_int(self, table_info: TableInfo, field: str) -> Optional[int]: return None @staticmethod - def _raise_if_error( - response: Union[ExecuteStatementResponse, GetStatementResponse], key: str - ) -> None: + def _raise_if_error(response: StatementResponse, key: str) -> None: if response.status and response.status.state in [ StatementState.FAILED, StatementState.CANCELED, diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_dataproduct.py b/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_dataproduct.py index 45e92628430258..c474e423030e05 100644 --- a/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_dataproduct.py +++ b/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_dataproduct.py @@ -11,7 +11,7 @@ from datahub.ingestion.transformer.dataset_transformer import ( DatasetDataproductTransformer, ) -from datahub.metadata.schema_classes import MetadataChangeProposalClass +from datahub.metadata.schema_classes import ContainerClass, MetadataChangeProposalClass from datahub.specific.dataproduct import DataProductPatchBuilder logger = logging.getLogger(__name__) @@ -23,6 +23,8 @@ class AddDatasetDataProductConfig(ConfigModel): _resolve_data_product_fn = pydantic_resolve_key("get_data_product_to_add") + is_container: bool = False + class AddDatasetDataProduct(DatasetDataproductTransformer): """Transformer that adds dataproduct entity for provided dataset as its asset according to a callback function.""" @@ -49,10 +51,11 @@ def handle_end_of_stream( self, ) -> List[Union[MetadataChangeProposalWrapper, MetadataChangeProposalClass]]: data_products: Dict[str, DataProductPatchBuilder] = {} - + data_products_container: Dict[str, DataProductPatchBuilder] = {} logger.debug("Generating dataproducts") for entity_urn in self.entity_map.keys(): data_product_urn = self.config.get_data_product_to_add(entity_urn) + is_container = self.config.is_container if data_product_urn: if data_product_urn not in data_products: data_products[data_product_urn] = DataProductPatchBuilder( @@ -63,11 +66,34 @@ def handle_end_of_stream( data_product_urn ].add_asset(entity_urn) + if is_container: + assert self.ctx.graph + container_aspect = self.ctx.graph.get_aspect( + entity_urn, aspect_type=ContainerClass + ) + if not container_aspect: + continue + container_urn = container_aspect.container + if data_product_urn not in data_products_container: + container_product = DataProductPatchBuilder( + data_product_urn + ).add_asset(container_urn) + data_products_container[data_product_urn] = container_product + else: + data_products_container[ + data_product_urn + ] = data_products_container[data_product_urn].add_asset( + container_urn + ) + mcps: List[ Union[MetadataChangeProposalWrapper, MetadataChangeProposalClass] ] = [] for data_product in data_products.values(): mcps.extend(list(data_product.build())) + if is_container: + for data_product in data_products_container.values(): + mcps.extend(list(data_product.build())) return mcps @@ -97,6 +123,7 @@ def create( class PatternDatasetDataProductConfig(ConfigModel): dataset_to_data_product_urns_pattern: KeyValuePattern = KeyValuePattern.all() + is_container: bool = False @pydantic.root_validator(pre=True) def validate_pattern_value(cls, values: Dict) -> Dict: @@ -122,6 +149,7 @@ def __init__(self, config: PatternDatasetDataProductConfig, ctx: PipelineContext )[0] if dataset_to_data_product.value(dataset_urn) else None, + is_container=config.is_container, ) super().__init__(generic_config, ctx) diff --git a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py index 976ff8bcc9b3ff..0146343002171b 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py +++ b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py @@ -189,35 +189,49 @@ def _table_level_lineage( statement: sqlglot.Expression, dialect: sqlglot.Dialect ) -> Tuple[Set[_TableName], Set[_TableName]]: # Generate table-level lineage. - modified = { - _TableName.from_sqlglot_table(expr.this) - for expr in statement.find_all( - sqlglot.exp.Create, - sqlglot.exp.Insert, - sqlglot.exp.Update, - sqlglot.exp.Delete, - sqlglot.exp.Merge, - ) - # In some cases like "MERGE ... then INSERT (col1, col2) VALUES (col1, col2)", - # the `this` on the INSERT part isn't a table. - if isinstance(expr.this, sqlglot.exp.Table) - } | { - # For statements that include a column list, like - # CREATE DDL statements and `INSERT INTO table (col1, col2) SELECT ...` - # the table name is nested inside a Schema object. - _TableName.from_sqlglot_table(expr.this.this) - for expr in statement.find_all( - sqlglot.exp.Create, - sqlglot.exp.Insert, - ) - if isinstance(expr.this, sqlglot.exp.Schema) - and isinstance(expr.this.this, sqlglot.exp.Table) - } + modified = ( + { + _TableName.from_sqlglot_table(expr.this) + for expr in statement.find_all( + sqlglot.exp.Create, + sqlglot.exp.Insert, + sqlglot.exp.Update, + sqlglot.exp.Delete, + sqlglot.exp.Merge, + sqlglot.exp.AlterTable, + ) + # In some cases like "MERGE ... then INSERT (col1, col2) VALUES (col1, col2)", + # the `this` on the INSERT part isn't a table. + if isinstance(expr.this, sqlglot.exp.Table) + } + | { + # For statements that include a column list, like + # CREATE DDL statements and `INSERT INTO table (col1, col2) SELECT ...` + # the table name is nested inside a Schema object. + _TableName.from_sqlglot_table(expr.this.this) + for expr in statement.find_all( + sqlglot.exp.Create, + sqlglot.exp.Insert, + ) + if isinstance(expr.this, sqlglot.exp.Schema) + and isinstance(expr.this.this, sqlglot.exp.Table) + } + | { + # For drop statements, we only want it if a table/view is being dropped. + # Other "kinds" will not have table.name populated. + _TableName.from_sqlglot_table(expr.this) + for expr in ([statement] if isinstance(statement, sqlglot.exp.Drop) else []) + if isinstance(expr.this, sqlglot.exp.Table) + and expr.this.this + and expr.this.name + } + ) tables = ( { _TableName.from_sqlglot_table(table) for table in statement.find_all(sqlglot.exp.Table) + if not isinstance(table.parent, sqlglot.exp.Drop) } # ignore references created in this query - modified diff --git a/metadata-ingestion/src/datahub/telemetry/telemetry.py b/metadata-ingestion/src/datahub/telemetry/telemetry.py index 69a790b3d9bc76..4faf04ee2d2c76 100644 --- a/metadata-ingestion/src/datahub/telemetry/telemetry.py +++ b/metadata-ingestion/src/datahub/telemetry/telemetry.py @@ -283,7 +283,7 @@ def init_tracking(self) -> None: if not self.enabled or self.mp is None or self.tracking_init is True: return - logger.debug("Sending init Telemetry") + logger.debug("Sending init telemetry") try: self.mp.people_set( self.client_id, @@ -310,13 +310,21 @@ def ping( if not self.enabled or self.mp is None: return + properties = properties or {} + # send event try: - logger.debug(f"Sending telemetry for {event_name}") + if event_name == "function-call": + logger.debug( + f"Sending telemetry for {event_name} {properties.get('function')}, status {properties.get('status')}" + ) + else: + logger.debug(f"Sending telemetry for {event_name}") + properties = { **_default_telemetry_properties(), **self._server_props(server), - **(properties or {}), + **properties, } self.mp.track(self.client_id, event_name, properties) except Exception as e: diff --git a/metadata-ingestion/src/datahub/testing/check_sql_parser_result.py b/metadata-ingestion/src/datahub/testing/check_sql_parser_result.py index 39c0dddd314002..72b5f6c5e26e4b 100644 --- a/metadata-ingestion/src/datahub/testing/check_sql_parser_result.py +++ b/metadata-ingestion/src/datahub/testing/check_sql_parser_result.py @@ -15,7 +15,6 @@ logger = logging.getLogger(__name__) -# TODO: Hook this into the standard --update-golden-files mechanism. UPDATE_FILES = os.environ.get("UPDATE_SQLPARSER_FILES", "false").lower() == "true" diff --git a/metadata-ingestion/src/datahub/utilities/logging_manager.py b/metadata-ingestion/src/datahub/utilities/logging_manager.py index 64383745eb2d1b..1eb763394094ac 100644 --- a/metadata-ingestion/src/datahub/utilities/logging_manager.py +++ b/metadata-ingestion/src/datahub/utilities/logging_manager.py @@ -35,6 +35,8 @@ "acryl_datahub_cloud", ] IN_MEMORY_LOG_BUFFER_SIZE = 2000 # lines +IN_MEMORY_LOG_BUFFER_MAX_LINE_LENGTH = 2000 # characters + NO_COLOR = os.environ.get("NO_COLOR", False) @@ -159,6 +161,9 @@ def __init__(self, maxlen: Optional[int] = None) -> None: self._buffer: Deque[str] = collections.deque(maxlen=maxlen) def write(self, line: str) -> None: + if len(line) > IN_MEMORY_LOG_BUFFER_MAX_LINE_LENGTH: + line = line[:IN_MEMORY_LOG_BUFFER_MAX_LINE_LENGTH] + "[truncated]" + self._buffer.append(line) def clear(self) -> None: diff --git a/metadata-ingestion/src/datahub/utilities/sqlalchemy_type_converter.py b/metadata-ingestion/src/datahub/utilities/sqlalchemy_type_converter.py index 41d02646fdb8a0..ad94c6904e2807 100644 --- a/metadata-ingestion/src/datahub/utilities/sqlalchemy_type_converter.py +++ b/metadata-ingestion/src/datahub/utilities/sqlalchemy_type_converter.py @@ -5,6 +5,8 @@ from typing import Any, Dict, List, Optional, Type, Union from sqlalchemy import types +from sqlalchemy.engine.reflection import Inspector +from sqlalchemy.sql.visitors import Visitable from datahub.ingestion.extractor.schema_util import avro_schema_to_mce_fields from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaField @@ -176,6 +178,7 @@ def get_avro_for_sqlalchemy_column( def get_schema_fields_for_sqlalchemy_column( column_name: str, column_type: types.TypeEngine, + inspector: Inspector, description: Optional[str] = None, nullable: Optional[bool] = True, is_part_of_key: Optional[bool] = False, @@ -216,7 +219,10 @@ def get_schema_fields_for_sqlalchemy_column( SchemaField( fieldPath=column_name, type=SchemaFieldDataTypeClass(type=NullTypeClass()), - nativeDataType=str(column_type), + nativeDataType=get_native_data_type_for_sqlalchemy_type( + column_type, + inspector, + ), ) ] @@ -240,3 +246,25 @@ def get_schema_fields_for_sqlalchemy_column( ) return schema_fields + + +def get_native_data_type_for_sqlalchemy_type( + column_type: types.TypeEngine, inspector: Inspector +) -> str: + if isinstance(column_type, types.NullType): + return column_type.__visit_name__ + + try: + return column_type.compile(dialect=inspector.dialect) + except Exception as e: + logger.debug( + f"Unable to compile sqlalchemy type {column_type} the error was: {e}" + ) + + if ( + isinstance(column_type, Visitable) + and column_type.__visit_name__ is not None + ): + return column_type.__visit_name__ + + return repr(column_type) diff --git a/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py b/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py index 5bef17119675e0..f0e4c6f5ee14a1 100644 --- a/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py +++ b/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py @@ -131,7 +131,7 @@ def _modify_at_path( _modify_at_path(getattr(model, path[0]), path[1:], new_value) -def _lowercase_dataset_urn(dataset_urn: str) -> str: +def lowercase_dataset_urn(dataset_urn: str) -> str: cur_urn = DatasetUrn.from_string(dataset_urn) new_urn = DatasetUrn( platform=cur_urn.platform, name=cur_urn.name.lower(), env=cur_urn.env @@ -149,10 +149,10 @@ def lowercase_dataset_urns( ) -> None: def modify_urn(urn: str) -> str: if guess_entity_type(urn) == "dataset": - return _lowercase_dataset_urn(urn) + return lowercase_dataset_urn(urn) elif guess_entity_type(urn) == "schemaField": cur_urn = Urn.from_string(urn) - cur_urn._entity_ids[0] = _lowercase_dataset_urn(cur_urn._entity_ids[0]) + cur_urn._entity_ids[0] = lowercase_dataset_urn(cur_urn._entity_ids[0]) return str(cur_urn) return urn diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_prefer_sql_parser_lineage_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_prefer_sql_parser_lineage_golden.json index 81754fd6cbcaca..d2c71659706818 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_prefer_sql_parser_lineage_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_prefer_sql_parser_lineage_golden.json @@ -638,8 +638,8 @@ "json": { "timestampMillis": 1663355198240, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "STARTED" } @@ -659,8 +659,8 @@ "json": { "timestampMillis": 1663355198242, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "COMPLETE", "result": { @@ -1097,8 +1097,8 @@ "json": { "timestampMillis": 1663355198240, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "STARTED" } @@ -1118,8 +1118,8 @@ "json": { "timestampMillis": 1663355198242, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "COMPLETE", "result": { @@ -1420,8 +1420,8 @@ "json": { "timestampMillis": 1663355198240, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "STARTED" } @@ -1441,8 +1441,8 @@ "json": { "timestampMillis": 1663355198242, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "COMPLETE", "result": { @@ -1944,8 +1944,8 @@ "json": { "timestampMillis": 1663355198240, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "STARTED" } @@ -1965,8 +1965,8 @@ "json": { "timestampMillis": 1663355198242, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "COMPLETE", "result": { @@ -1982,6 +1982,2201 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.actor,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.actor,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.actor,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "model_maturity": "in dev", + "owner": "@alice", + "some_other_property": "test 1", + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "language": "sql", + "dbt_unique_id": "source.sample_dbt.pagila.actor", + "dbt_package_name": "sample_dbt", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "name": "actor", + "description": "description for actor table from dbt", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:@alice", + "type": "DATAOWNER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "source.sample_dbt.pagila.actor", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 1581759273000, + "actor": "urn:li:corpuser:dbt_executor" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "actor_id", + "nullable": false, + "description": "description for actor_id column from dbt", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "first_name", + "nullable": false, + "description": "dbt comment: Actors column \u2013 from postgres\n\ndbt model description: description for first_name from dbt", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "last_name", + "nullable": false, + "description": "description for last_name from dbt", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "last_update", + "nullable": false, + "description": "description for last_update from dbt", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp with time zone", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.actor,PROD)", + "type": "COPY" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "language": "sql", + "dbt_unique_id": "source.sample_dbt.pagila.address", + "dbt_package_name": "sample_dbt", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "name": "address", + "description": "a user's address", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "source.sample_dbt.pagila.address", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 1581759930000, + "actor": "urn:li:corpuser:dbt_executor" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "address", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "address2", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "address_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "city_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "district", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "last_update", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp with time zone", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "phone", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "postal_code", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.address,PROD)", + "type": "COPY" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.category,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.category,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.category,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "language": "sql", + "dbt_unique_id": "source.sample_dbt.pagila.category", + "dbt_package_name": "sample_dbt", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "name": "category", + "description": "a user's category", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "source.sample_dbt.pagila.category", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 1581759987000, + "actor": "urn:li:corpuser:dbt_executor" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "category_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "last_update", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp with time zone", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "name", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.category,PROD)", + "type": "COPY" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.city,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.city,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.city,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "language": "sql", + "dbt_unique_id": "source.sample_dbt.pagila.city", + "dbt_package_name": "sample_dbt", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "name": "city", + "description": "", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "source.sample_dbt.pagila.city", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 1581759925000, + "actor": "urn:li:corpuser:dbt_executor" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "city", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "city_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "country_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "last_update", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp with time zone", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.city,PROD)", + "type": "COPY" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.country,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.country,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.country,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "model_maturity": "in prod", + "owner": "@bob", + "some_other_property": "test 2", + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "language": "sql", + "dbt_unique_id": "source.sample_dbt.pagila.country", + "dbt_package_name": "sample_dbt", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "name": "country", + "description": "", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:@bob", + "type": "DATAOWNER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "source.sample_dbt.pagila.country", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 1581759840000, + "actor": "urn:li:corpuser:dbt_executor" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "country", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "country_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "last_update", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp with time zone", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.country,PROD)", + "type": "COPY" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "language": "sql", + "dbt_unique_id": "source.sample_dbt.pagila.customer", + "dbt_package_name": "sample_dbt", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "name": "customer", + "description": "description for customer table from dbt", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "source.sample_dbt.pagila.customer", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 1581760640000, + "actor": "urn:li:corpuser:dbt_executor" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "active", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "activebool", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.BooleanType": {} + } + }, + "nativeDataType": "boolean", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "address_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "create_date", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.DateType": {} + } + }, + "nativeDataType": "date", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "customer_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "email", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "first_name", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "last_name", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "last_update", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp with time zone", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "store_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer,PROD)", + "type": "COPY" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "language": "sql", + "dbt_unique_id": "source.sample_dbt.pagila.payment_p2020_01", + "dbt_package_name": "sample_dbt", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "name": "payment_p2020_01", + "description": "", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "source.sample_dbt.pagila.payment_p2020_01", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 1580505371997, + "actor": "urn:li:corpuser:dbt_executor" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "amount", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "numeric(5,2)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "customer_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "payment_date", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp with time zone", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "payment_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "rental_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "staff_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_01,PROD)", + "type": "COPY" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "an_array_property": "['alpha', 'beta', 'charlie']", + "model_maturity": "in prod", + "owner": "@charles", + "some_other_property": "test 3", + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "language": "sql", + "dbt_unique_id": "source.sample_dbt.pagila.payment_p2020_02", + "dbt_package_name": "sample_dbt", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "name": "payment_p2020_02", + "description": "", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:@charles", + "type": "DATAOWNER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "source.sample_dbt.pagila.payment_p2020_02", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 1582319845997, + "actor": "urn:li:corpuser:dbt_executor" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "amount", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "numeric(5,2)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "customer_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "payment_date", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp with time zone", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "payment_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "rental_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "staff_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_02,PROD)", + "type": "COPY" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "language": "sql", + "dbt_unique_id": "source.sample_dbt.pagila.payment_p2020_03", + "dbt_package_name": "sample_dbt", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "name": "payment_p2020_03", + "description": "", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "source.sample_dbt.pagila.payment_p2020_03", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 1584998318997, + "actor": "urn:li:corpuser:dbt_executor" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "amount", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "numeric(5,2)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "customer_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "payment_date", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp with time zone", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "payment_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "rental_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "staff_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_03,PROD)", + "type": "COPY" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "language": "sql", + "dbt_unique_id": "source.sample_dbt.pagila.payment_p2020_04", + "dbt_package_name": "sample_dbt", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "name": "payment_p2020_04", + "description": "", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "source.sample_dbt.pagila.payment_p2020_04", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 1588287228997, + "actor": "urn:li:corpuser:dbt_executor" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "amount", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "numeric(5,2)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "customer_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "payment_date", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp with time zone", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "payment_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "rental_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "staff_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_04,PROD)", + "type": "COPY" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "language": "sql", + "dbt_unique_id": "source.sample_dbt.pagila.payment_p2020_05", + "dbt_package_name": "sample_dbt", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "name": "payment_p2020_05", + "description": "a payment", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "source.sample_dbt.pagila.payment_p2020_05", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 1589460269997, + "actor": "urn:li:corpuser:dbt_executor" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "amount", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "numeric(5,2)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "customer_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "payment_date", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp with time zone", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "payment_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "rental_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "staff_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_05,PROD)", + "type": "COPY" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "language": "sql", + "dbt_unique_id": "source.sample_dbt.pagila.payment_p2020_06", + "dbt_package_name": "sample_dbt", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "name": "payment_p2020_06", + "description": "", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "source.sample_dbt.pagila.payment_p2020_06", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": -62135596800000, + "actor": "urn:li:corpuser:dbt_executor" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "amount", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "numeric(5,2)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "customer_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "payment_date", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp with time zone", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "payment_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "rental_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "staff_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_06,PROD)", + "type": "COPY" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.an-aliased-view-for-monthly-billing,PROD)", @@ -2344,8 +4539,8 @@ }, "assertionUrn": "urn:li:assertion:ba2c6ba830d407d539452f4cf46c92a6", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -2426,8 +4621,8 @@ }, "assertionUrn": "urn:li:assertion:10f2a119dedcaab43afc47ff13d9cb5b", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -2507,8 +4702,8 @@ }, "assertionUrn": "urn:li:assertion:c456eccf6440c6e3388c584689a74d91", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -2588,8 +4783,8 @@ }, "assertionUrn": "urn:li:assertion:f812b73477d81e6af283d918cb59e7bf", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -2678,8 +4873,8 @@ }, "assertionUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -2752,8 +4947,8 @@ }, "assertionUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -2833,8 +5028,8 @@ }, "assertionUrn": "urn:li:assertion:f6a1fde3ab4919abcc04bdee93144958", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -2920,8 +5115,8 @@ }, "assertionUrn": "urn:li:assertion:60ce4aad7ff6dbff7004da0f2258c9df", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, diff --git a/metadata-ingestion/tests/integration/dbt/test_dbt.py b/metadata-ingestion/tests/integration/dbt/test_dbt.py index a46da9707679c7..d213cffa78045e 100644 --- a/metadata-ingestion/tests/integration/dbt/test_dbt.py +++ b/metadata-ingestion/tests/integration/dbt/test_dbt.py @@ -227,7 +227,7 @@ def set_paths( source_config_modifiers={ "prefer_sql_parser_lineage": True, "skip_sources_in_lineage": True, - "entities_enabled": {"sources": "NO"}, + # "entities_enabled": {"sources": "NO"}, }, ), ], diff --git a/metadata-ingestion/tests/integration/hana/docker-compose.yml b/metadata-ingestion/tests/integration/hana/docker-compose.yml index 38bd1f544a0955..3f742362284835 100644 --- a/metadata-ingestion/tests/integration/hana/docker-compose.yml +++ b/metadata-ingestion/tests/integration/hana/docker-compose.yml @@ -2,15 +2,11 @@ version: '3.4' services: testhana: - image: "store/saplabs/hanaexpress:2.00.054.00.20210603.1" + image: "saplabs/hanaexpress:latest" container_name: "testhana" restart: "unless-stopped" ports: - - 39013:39013 - - 39017:39017 - - 39041-39045:39041-39045 - - 1128-1129:1128-1129 - - 59013-59014:59013-59014 + - 39041:39041 volumes: - ./post_start:/hana/hooks/post_start/ - ./setup:/hana/mounts/setup/ diff --git a/metadata-ingestion/tests/integration/hana/hana_mces_golden.json b/metadata-ingestion/tests/integration/hana/hana_mces_golden.json index 84ad1f3d3e592c..26789abc355c75 100644 --- a/metadata-ingestion/tests/integration/hana/hana_mces_golden.json +++ b/metadata-ingestion/tests/integration/hana/hana_mces_golden.json @@ -1,158 +1,217 @@ [ { - "auditHeader": null, "entityType": "container", "entityUrn": "urn:li:container:c8107a53ee221a15de176e4d34a06940", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { - "value": "{\"customProperties\": {\"platform\": \"hana\", \"instance\": \"PROD\", \"database\": \"hxe\"}, \"name\": \"hxe\"}", - "contentType": "application/json" + "json": { + "customProperties": { + "platform": "hana", + "env": "PROD", + "database": "hxe" + }, + "name": "hxe" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hana-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c8107a53ee221a15de176e4d34a06940", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1586847600000, "runId": "hana-test", - "registryName": null, - "registryVersion": null, - "properties": null + "lastRunId": "no-run-id-provided" } }, { - "auditHeader": null, "entityType": "container", "entityUrn": "urn:li:container:c8107a53ee221a15de176e4d34a06940", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:hana\"}", - "contentType": "application/json" + "json": { + "platform": "urn:li:dataPlatform:hana" + } }, "systemMetadata": { "lastObserved": 1586847600000, "runId": "hana-test", - "registryName": null, - "registryVersion": null, - "properties": null + "lastRunId": "no-run-id-provided" } }, { - "auditHeader": null, "entityType": "container", "entityUrn": "urn:li:container:c8107a53ee221a15de176e4d34a06940", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { - "value": "{\"typeNames\": [\"Database\"]}", - "contentType": "application/json" + "json": { + "typeNames": [ + "Database" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hana-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c8107a53ee221a15de176e4d34a06940", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } }, "systemMetadata": { "lastObserved": 1586847600000, "runId": "hana-test", - "registryName": null, - "registryVersion": null, - "properties": null + "lastRunId": "no-run-id-provided" } }, { - "auditHeader": null, "entityType": "container", "entityUrn": "urn:li:container:f870c782e0a44727bd10da2ab742363b", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { - "value": "{\"customProperties\": {\"platform\": \"hana\", \"instance\": \"PROD\", \"database\": \"hxe\", \"schema\": \"hotel\"}, \"name\": \"hotel\"}", - "contentType": "application/json" + "json": { + "customProperties": { + "platform": "hana", + "env": "PROD", + "database": "hxe", + "schema": "hotel" + }, + "name": "hotel" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hana-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f870c782e0a44727bd10da2ab742363b", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1586847600000, "runId": "hana-test", - "registryName": null, - "registryVersion": null, - "properties": null + "lastRunId": "no-run-id-provided" } }, { - "auditHeader": null, "entityType": "container", "entityUrn": "urn:li:container:f870c782e0a44727bd10da2ab742363b", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:hana\"}", - "contentType": "application/json" + "json": { + "platform": "urn:li:dataPlatform:hana" + } }, "systemMetadata": { "lastObserved": 1586847600000, "runId": "hana-test", - "registryName": null, - "registryVersion": null, - "properties": null + "lastRunId": "no-run-id-provided" } }, { - "auditHeader": null, "entityType": "container", "entityUrn": "urn:li:container:f870c782e0a44727bd10da2ab742363b", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { - "value": "{\"typeNames\": [\"Schema\"]}", - "contentType": "application/json" + "json": { + "typeNames": [ + "Schema" + ] + } }, "systemMetadata": { "lastObserved": 1586847600000, "runId": "hana-test", - "registryName": null, - "registryVersion": null, - "properties": null + "lastRunId": "no-run-id-provided" } }, { - "auditHeader": null, "entityType": "container", "entityUrn": "urn:li:container:f870c782e0a44727bd10da2ab742363b", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "container", "aspect": { - "value": "{\"container\": \"urn:li:container:c8107a53ee221a15de176e4d34a06940\"}", - "contentType": "application/json" + "json": { + "container": "urn:li:container:c8107a53ee221a15de176e4d34a06940" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hana-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f870c782e0a44727bd10da2ab742363b", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:c8107a53ee221a15de176e4d34a06940", + "urn": "urn:li:container:c8107a53ee221a15de176e4d34a06940" + } + ] + } }, "systemMetadata": { "lastObserved": 1586847600000, "runId": "hana-test", - "registryName": null, - "registryVersion": null, - "properties": null + "lastRunId": "no-run-id-provided" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hana,hotel.customer,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "container", "aspect": { - "value": "{\"container\": \"urn:li:container:f870c782e0a44727bd10da2ab742363b\"}", - "contentType": "application/json" + "json": { + "container": "urn:li:container:f870c782e0a44727bd10da2ab742363b" + } }, "systemMetadata": { "lastObserved": 1586847600000, "runId": "hana-test", - "registryName": null, - "registryVersion": null, - "properties": null + "lastRunId": "no-run-id-provided" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:hana,hotel.customer,PROD)", @@ -165,11 +224,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": {}, - "externalUrl": null, "name": "customer", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } }, @@ -180,17 +235,12 @@ "version": 0, "created": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null + "actor": "urn:li:corpuser:unknown" }, - "deleted": null, - "dataset": null, - "cluster": null, "hash": "", "platformSchema": { "com.linkedin.pegasus2avro.schema.MySqlDDL": { @@ -200,189 +250,166 @@ "fields": [ { "fieldPath": "cno", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": true, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": true }, { "fieldPath": "title", - "jsonPath": null, "nullable": true, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=7)", + "nativeDataType": "VARCHAR(7)", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "firstname", - "jsonPath": null, "nullable": true, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=20)", + "nativeDataType": "VARCHAR(20)", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "name", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=40)", + "nativeDataType": "VARCHAR(40)", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "address", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=40)", + "nativeDataType": "VARCHAR(40)", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "zip", - "jsonPath": null, "nullable": true, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=6)", + "nativeDataType": "VARCHAR(6)", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false } - ], - "primaryKeys": null, - "foreignKeysSpecs": null, - "foreignKeys": null + ] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, "runId": "hana-test", - "registryName": null, - "registryVersion": null, - "properties": null + "lastRunId": "no-run-id-provided" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hana,hotel.customer,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { - "value": "{\"typeNames\": [\"Table\"]}", - "contentType": "application/json" + "json": { + "typeNames": [ + "Table" + ] + } }, "systemMetadata": { "lastObserved": 1586847600000, "runId": "hana-test", - "registryName": null, - "registryVersion": null, - "properties": null + "lastRunId": "no-run-id-provided" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hana,hotel.customer,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "domains", "aspect": { - "value": "{\"domains\": [\"urn:li:domain:sales\"]}", - "contentType": "application/json" + "json": { + "domains": [ + "urn:li:domain:sales" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hana-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hana,hotel.customer,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:c8107a53ee221a15de176e4d34a06940", + "urn": "urn:li:container:c8107a53ee221a15de176e4d34a06940" + }, + { + "id": "urn:li:container:f870c782e0a44727bd10da2ab742363b", + "urn": "urn:li:container:f870c782e0a44727bd10da2ab742363b" + } + ] + } }, "systemMetadata": { "lastObserved": 1586847600000, "runId": "hana-test", - "registryName": null, - "registryVersion": null, - "properties": null + "lastRunId": "no-run-id-provided" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hana,hotel.hotel,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "container", "aspect": { - "value": "{\"container\": \"urn:li:container:f870c782e0a44727bd10da2ab742363b\"}", - "contentType": "application/json" + "json": { + "container": "urn:li:container:f870c782e0a44727bd10da2ab742363b" + } }, "systemMetadata": { "lastObserved": 1586847600000, "runId": "hana-test", - "registryName": null, - "registryVersion": null, - "properties": null + "lastRunId": "no-run-id-provided" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:hana,hotel.hotel,PROD)", @@ -395,11 +422,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": {}, - "externalUrl": null, "name": "hotel", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } }, @@ -410,17 +433,12 @@ "version": 0, "created": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null + "actor": "urn:li:corpuser:unknown" }, - "deleted": null, - "dataset": null, - "cluster": null, "hash": "", "platformSchema": { "com.linkedin.pegasus2avro.schema.MySqlDDL": { @@ -430,189 +448,166 @@ "fields": [ { "fieldPath": "hno", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": true, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": true }, { "fieldPath": "name", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=50)", + "nativeDataType": "VARCHAR(50)", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "address", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=40)", + "nativeDataType": "VARCHAR(40)", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "city", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=30)", + "nativeDataType": "VARCHAR(30)", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "state", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=2)", + "nativeDataType": "VARCHAR(2)", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "zip", - "jsonPath": null, "nullable": true, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=6)", + "nativeDataType": "VARCHAR(6)", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false } - ], - "primaryKeys": null, - "foreignKeysSpecs": null, - "foreignKeys": null + ] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, "runId": "hana-test", - "registryName": null, - "registryVersion": null, - "properties": null + "lastRunId": "no-run-id-provided" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hana,hotel.hotel,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { - "value": "{\"typeNames\": [\"Table\"]}", - "contentType": "application/json" + "json": { + "typeNames": [ + "Table" + ] + } }, "systemMetadata": { "lastObserved": 1586847600000, "runId": "hana-test", - "registryName": null, - "registryVersion": null, - "properties": null + "lastRunId": "no-run-id-provided" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hana,hotel.hotel,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "domains", "aspect": { - "value": "{\"domains\": [\"urn:li:domain:sales\"]}", - "contentType": "application/json" + "json": { + "domains": [ + "urn:li:domain:sales" + ] + } }, "systemMetadata": { "lastObserved": 1586847600000, "runId": "hana-test", - "registryName": null, - "registryVersion": null, - "properties": null + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hana,hotel.hotel,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:c8107a53ee221a15de176e4d34a06940", + "urn": "urn:li:container:c8107a53ee221a15de176e4d34a06940" + }, + { + "id": "urn:li:container:f870c782e0a44727bd10da2ab742363b", + "urn": "urn:li:container:f870c782e0a44727bd10da2ab742363b" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hana-test", + "lastRunId": "no-run-id-provided" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hana,hotel.maintenance,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "container", "aspect": { - "value": "{\"container\": \"urn:li:container:f870c782e0a44727bd10da2ab742363b\"}", - "contentType": "application/json" + "json": { + "container": "urn:li:container:f870c782e0a44727bd10da2ab742363b" + } }, "systemMetadata": { "lastObserved": 1586847600000, "runId": "hana-test", - "registryName": null, - "registryVersion": null, - "properties": null + "lastRunId": "no-run-id-provided" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:hana,hotel.maintenance,PROD)", @@ -625,11 +620,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": {}, - "externalUrl": null, "name": "maintenance", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } }, @@ -640,17 +631,12 @@ "version": 0, "created": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null + "actor": "urn:li:corpuser:unknown" }, - "deleted": null, - "dataset": null, - "cluster": null, "hash": "", "platformSchema": { "com.linkedin.pegasus2avro.schema.MySqlDDL": { @@ -660,205 +646,811 @@ "fields": [ { "fieldPath": "mno", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": true, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": true }, { "fieldPath": "hno", - "jsonPath": null, "nullable": true, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "description", - "jsonPath": null, "nullable": true, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=100)", + "nativeDataType": "VARCHAR(100)", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "date_performed", - "jsonPath": null, "nullable": true, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "performed_by", - "jsonPath": null, "nullable": true, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=40)", + "nativeDataType": "VARCHAR(40)", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false } - ], - "primaryKeys": null, - "foreignKeysSpecs": null, - "foreignKeys": null + ] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, "runId": "hana-test", - "registryName": null, - "registryVersion": null, - "properties": null + "lastRunId": "no-run-id-provided" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hana,hotel.maintenance,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { - "value": "{\"typeNames\": [\"Table\"]}", - "contentType": "application/json" + "json": { + "typeNames": [ + "Table" + ] + } }, "systemMetadata": { "lastObserved": 1586847600000, "runId": "hana-test", - "registryName": null, - "registryVersion": null, - "properties": null + "lastRunId": "no-run-id-provided" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hana,hotel.maintenance,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "domains", "aspect": { - "value": "{\"domains\": [\"urn:li:domain:sales\"]}", - "contentType": "application/json" + "json": { + "domains": [ + "urn:li:domain:sales" + ] + } }, "systemMetadata": { "lastObserved": 1586847600000, "runId": "hana-test", - "registryName": null, - "registryVersion": null, - "properties": null + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hana,hotel.maintenance,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:c8107a53ee221a15de176e4d34a06940", + "urn": "urn:li:container:c8107a53ee221a15de176e4d34a06940" + }, + { + "id": "urn:li:container:f870c782e0a44727bd10da2ab742363b", + "urn": "urn:li:container:f870c782e0a44727bd10da2ab742363b" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hana-test", + "lastRunId": "no-run-id-provided" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hana,hotel.customer,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { - "value": "{\"timestampMillis\": 1586847600000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 15, \"columnCount\": 6, \"fieldProfiles\": [{\"fieldPath\": \"cno\", \"uniqueCount\": 15, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"1000\", \"1001\", \"1002\", \"1003\", \"1004\", \"1005\", \"1006\", \"1007\", \"1008\", \"1009\", \"1010\", \"1011\", \"1012\", \"1013\", \"1014\"]}, {\"fieldPath\": \"title\", \"uniqueCount\": 3, \"uniqueProportion\": 0.2, \"nullCount\": 0, \"nullProportion\": 0.0, \"distinctValueFrequencies\": [{\"value\": \"Company\", \"frequency\": 2}, {\"value\": \"Mr\", \"frequency\": 7}, {\"value\": \"Mrs\", \"frequency\": 6}], \"sampleValues\": [\"Mrs\", \"Mr\", \"Company\", \"Mrs\", \"Mrs\", \"Mr\", \"Mrs\", \"Mr\", \"Mrs\", \"Mr\", \"Mr\", \"Mrs\", \"Mr\", \"Company\", \"Mr\"]}, {\"fieldPath\": \"firstname\", \"uniqueCount\": 13, \"uniqueProportion\": 1.0, \"nullCount\": 2, \"nullProportion\": 0.13333333333333333, \"sampleValues\": [\"Jenny\", \"Peter\", \"Rose\", \"Mary\", \"Martin\", \"Sally\", \"Mike\", \"Rita\", \"George\", \"Frank\", \"Susan\", \"Joseph\", \"Antony\"]}, {\"fieldPath\": \"name\", \"uniqueCount\": 15, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Porter\", \"Brown\", \"Datasoft\", \"Brian\", \"Griffith\", \"Randolph\", \"Smith\", \"Jackson\", \"Doe\", \"Howe\", \"Miller\", \"Baker\", \"Peters\", \"TOOLware\", \"Jenkins\"]}, {\"fieldPath\": \"address\", \"uniqueCount\": 15, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"1340 N. Ash Street, #3\", \"1001 34th St., APT.3\", \"486 Maple St.\", \"500 Yellowstone Drive, #2\", \"3401 Elder Lane\", \"340 MAIN STREET, #7\", \"250 Curtis Street\", \"133 BROADWAY APT. 1\", \"2000 Humboldt St., #6\", \"111 B Parkway, #23\", \"27 5th St., 76\", \"200 MAIN STREET, #94\", \"700 S. Ash St., APT.12\", \"410 Mariposa St., #10\", \"55 A Parkway, #15\"]}, {\"fieldPath\": \"zip\", \"uniqueCount\": 12, \"uniqueProportion\": 0.8, \"nullCount\": 0, \"nullProportion\": 0.0, \"distinctValueFrequencies\": [{\"value\": \"10580\", \"frequency\": 1}, {\"value\": \"20005\", \"frequency\": 1}, {\"value\": \"20019\", \"frequency\": 1}, {\"value\": \"20903\", \"frequency\": 1}, {\"value\": \"45211\", \"frequency\": 1}, {\"value\": \"48226\", \"frequency\": 1}, {\"value\": \"60615\", \"frequency\": 1}, {\"value\": \"75243\", \"frequency\": 3}, {\"value\": \"90018\", \"frequency\": 2}, {\"value\": \"92714\", \"frequency\": 1}, {\"value\": \"95054\", \"frequency\": 1}, {\"value\": \"97213\", \"frequency\": 1}], \"sampleValues\": [\"10580\", \"48226\", \"90018\", \"75243\", \"20005\", \"60615\", \"75243\", \"45211\", \"97213\", \"75243\", \"95054\", \"90018\", \"92714\", \"20019\", \"20903\"]}]}", - "contentType": "application/json" + "json": { + "timestampMillis": 1586847600000, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "rowCount": 15, + "columnCount": 6, + "fieldProfiles": [ + { + "fieldPath": "cno", + "uniqueCount": 15, + "uniqueProportion": 1, + "nullCount": 0, + "nullProportion": 0.0, + "min": "1000", + "max": "1014", + "mean": "1007.0", + "median": "1007", + "stdev": "4.472135", + "sampleValues": [ + "1000", + "1001", + "1002", + "1003", + "1004", + "1005", + "1006", + "1007", + "1008", + "1009", + "1010", + "1011", + "1012", + "1013", + "1014" + ] + }, + { + "fieldPath": "title", + "uniqueCount": 3, + "uniqueProportion": 0.2, + "nullCount": 0, + "nullProportion": 0.0, + "distinctValueFrequencies": [ + { + "value": "Company", + "frequency": 2 + }, + { + "value": "Mr", + "frequency": 7 + }, + { + "value": "Mrs", + "frequency": 6 + } + ], + "sampleValues": [ + "Mrs", + "Mr", + "Company", + "Mrs", + "Mrs", + "Mr", + "Mrs", + "Mr", + "Mrs", + "Mr", + "Mr", + "Mrs", + "Mr", + "Company", + "Mr" + ] + }, + { + "fieldPath": "firstname", + "uniqueCount": 13, + "uniqueProportion": 1, + "nullCount": 2, + "nullProportion": 0.13333333333333333, + "sampleValues": [ + "Jenny", + "Peter", + "Rose", + "Mary", + "Martin", + "Sally", + "Mike", + "Rita", + "George", + "Frank", + "Susan", + "Joseph", + "Antony" + ] + }, + { + "fieldPath": "name", + "uniqueCount": 15, + "uniqueProportion": 1, + "nullCount": 0, + "nullProportion": 0.0, + "sampleValues": [ + "Porter", + "Brown", + "Datasoft", + "Brian", + "Griffith", + "Randolph", + "Smith", + "Jackson", + "Doe", + "Howe", + "Miller", + "Baker", + "Peters", + "TOOLware", + "Jenkins" + ] + }, + { + "fieldPath": "address", + "uniqueCount": 15, + "uniqueProportion": 1, + "nullCount": 0, + "nullProportion": 0.0, + "sampleValues": [ + "1340 N. Ash Street, #3", + "1001 34th St., APT.3", + "486 Maple St.", + "500 Yellowstone Drive, #2", + "3401 Elder Lane", + "340 MAIN STREET, #7", + "250 Curtis Street", + "133 BROADWAY APT. 1", + "2000 Humboldt St., #6", + "111 B Parkway, #23", + "27 5th St., 76", + "200 MAIN STREET, #94", + "700 S. Ash St., APT.12", + "410 Mariposa St., #10", + "55 A Parkway, #15" + ] + }, + { + "fieldPath": "zip", + "uniqueCount": 12, + "uniqueProportion": 0.8, + "nullCount": 0, + "nullProportion": 0.0, + "distinctValueFrequencies": [ + { + "value": "10580", + "frequency": 1 + }, + { + "value": "20005", + "frequency": 1 + }, + { + "value": "20019", + "frequency": 1 + }, + { + "value": "20903", + "frequency": 1 + }, + { + "value": "45211", + "frequency": 1 + }, + { + "value": "48226", + "frequency": 1 + }, + { + "value": "60615", + "frequency": 1 + }, + { + "value": "75243", + "frequency": 3 + }, + { + "value": "90018", + "frequency": 2 + }, + { + "value": "92714", + "frequency": 1 + }, + { + "value": "95054", + "frequency": 1 + }, + { + "value": "97213", + "frequency": 1 + } + ], + "sampleValues": [ + "10580", + "48226", + "90018", + "75243", + "20005", + "60615", + "75243", + "45211", + "97213", + "75243", + "95054", + "90018", + "92714", + "20019", + "20903" + ] + } + ] + } }, "systemMetadata": { "lastObserved": 1586847600000, "runId": "hana-test", - "registryName": null, - "registryVersion": null, - "properties": null + "lastRunId": "no-run-id-provided" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hana,hotel.hotel,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { - "value": "{\"timestampMillis\": 1586847600000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 17, \"columnCount\": 6, \"fieldProfiles\": [{\"fieldPath\": \"hno\", \"uniqueCount\": 17, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"10\", \"11\", \"12\", \"13\", \"14\", \"15\", \"16\", \"17\", \"18\", \"19\", \"20\", \"21\", \"22\", \"23\", \"24\", \"25\", \"26\"]}, {\"fieldPath\": \"name\", \"uniqueCount\": 17, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Congress\", \"Regency\", \"Long Island\", \"Empire State\", \"Midtown\", \"Eighth Avenue\", \"Lake Michigan\", \"Airport\", \"Sunshine\", \"Beach\", \"Atlantic\", \"Long Beach\", \"Indian Horse\", \"Star\", \"River Boat\", \"Ocean Star\", \"Bella Ciente\"]}, {\"fieldPath\": \"address\", \"uniqueCount\": 17, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"155 Beechwood St.\", \"477 17th Avenue\", \"1499 Grove Street\", \"65 Yellowstone Dr.\", \"12 Barnard St.\", \"112 8th Avenue\", \"354 OAK Terrace\", \"650 C Parkway\", \"200 Yellowstone Dr.\", \"1980 34th St.\", \"111 78th St.\", \"35 Broadway\", \"16 MAIN STREET\", \"13 Beechwood Place\", \"788 MAIN STREET\", \"45 Pacific Avenue\", \"1407 Marshall Ave\"]}, {\"fieldPath\": \"city\", \"uniqueCount\": 15, \"uniqueProportion\": 0.8823529411764706, \"nullCount\": 0, \"nullProportion\": 0.0, \"distinctValueFrequencies\": [{\"value\": \"Albany\", \"frequency\": 1}, {\"value\": \"Atlantic City\", \"frequency\": 1}, {\"value\": \"Chicago\", \"frequency\": 1}, {\"value\": \"Clearwater\", \"frequency\": 1}, {\"value\": \"Daytona Beach\", \"frequency\": 1}, {\"value\": \"Deerfield Beach\", \"frequency\": 1}, {\"value\": \"Hollywood\", \"frequency\": 1}, {\"value\": \"Long Beach\", \"frequency\": 1}, {\"value\": \"Long Island\", \"frequency\": 1}, {\"value\": \"Longview\", \"frequency\": 1}, {\"value\": \"New Orleans\", \"frequency\": 1}, {\"value\": \"New York\", \"frequency\": 2}, {\"value\": \"Palm Springs\", \"frequency\": 1}, {\"value\": \"Rosemont\", \"frequency\": 1}, {\"value\": \"Seattle\", \"frequency\": 2}], \"sampleValues\": [\"Seattle\", \"Seattle\", \"Long Island\", \"Albany\", \"New York\", \"New York\", \"Chicago\", \"Rosemont\", \"Clearwater\", \"Daytona Beach\", \"Deerfield Beach\", \"Long Beach\", \"Palm Springs\", \"Hollywood\", \"New Orleans\", \"Atlantic City\", \"Longview\"]}, {\"fieldPath\": \"state\", \"uniqueCount\": 8, \"uniqueProportion\": 0.47058823529411764, \"nullCount\": 0, \"nullProportion\": 0.0, \"distinctValueFrequencies\": [{\"value\": \"CA\", \"frequency\": 3}, {\"value\": \"FL\", \"frequency\": 3}, {\"value\": \"IL\", \"frequency\": 2}, {\"value\": \"LA\", \"frequency\": 1}, {\"value\": \"NJ\", \"frequency\": 1}, {\"value\": \"NY\", \"frequency\": 4}, {\"value\": \"TX\", \"frequency\": 1}, {\"value\": \"WA\", \"frequency\": 2}], \"sampleValues\": [\"WA\", \"WA\", \"NY\", \"NY\", \"NY\", \"NY\", \"IL\", \"IL\", \"FL\", \"FL\", \"FL\", \"CA\", \"CA\", \"CA\", \"LA\", \"NJ\", \"TX\"]}, {\"fieldPath\": \"zip\", \"uniqueCount\": 16, \"uniqueProportion\": 0.9411764705882353, \"nullCount\": 0, \"nullProportion\": 0.0, \"distinctValueFrequencies\": [{\"value\": \"08401\", \"frequency\": 1}, {\"value\": \"10019\", \"frequency\": 2}, {\"value\": \"11788\", \"frequency\": 1}, {\"value\": \"12203\", \"frequency\": 1}, {\"value\": \"20005\", \"frequency\": 1}, {\"value\": \"20037\", \"frequency\": 1}, {\"value\": \"32018\", \"frequency\": 1}, {\"value\": \"33441\", \"frequency\": 1}, {\"value\": \"33575\", \"frequency\": 1}, {\"value\": \"60018\", \"frequency\": 1}, {\"value\": \"60601\", \"frequency\": 1}, {\"value\": \"70112\", \"frequency\": 1}, {\"value\": \"75601\", \"frequency\": 1}, {\"value\": \"90029\", \"frequency\": 1}, {\"value\": \"90804\", \"frequency\": 1}, {\"value\": \"92262\", \"frequency\": 1}], \"sampleValues\": [\"20005\", \"20037\", \"11788\", \"12203\", \"10019\", \"10019\", \"60601\", \"60018\", \"33575\", \"32018\", \"33441\", \"90804\", \"92262\", \"90029\", \"70112\", \"08401\", \"75601\"]}]}", - "contentType": "application/json" + "json": { + "timestampMillis": 1586847600000, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "rowCount": 17, + "columnCount": 6, + "fieldProfiles": [ + { + "fieldPath": "hno", + "uniqueCount": 17, + "uniqueProportion": 1, + "nullCount": 0, + "nullProportion": 0.0, + "min": "10", + "max": "26", + "mean": "18.0", + "median": "18", + "stdev": "5.049752", + "sampleValues": [ + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26" + ] + }, + { + "fieldPath": "name", + "uniqueCount": 17, + "uniqueProportion": 1, + "nullCount": 0, + "nullProportion": 0.0, + "sampleValues": [ + "Congress", + "Regency", + "Long Island", + "Empire State", + "Midtown", + "Eighth Avenue", + "Lake Michigan", + "Airport", + "Sunshine", + "Beach", + "Atlantic", + "Long Beach", + "Indian Horse", + "Star", + "River Boat", + "Ocean Star", + "Bella Ciente" + ] + }, + { + "fieldPath": "address", + "uniqueCount": 17, + "uniqueProportion": 1, + "nullCount": 0, + "nullProportion": 0.0, + "sampleValues": [ + "155 Beechwood St.", + "477 17th Avenue", + "1499 Grove Street", + "65 Yellowstone Dr.", + "12 Barnard St.", + "112 8th Avenue", + "354 OAK Terrace", + "650 C Parkway", + "200 Yellowstone Dr.", + "1980 34th St.", + "111 78th St.", + "35 Broadway", + "16 MAIN STREET", + "13 Beechwood Place", + "788 MAIN STREET", + "45 Pacific Avenue", + "1407 Marshall Ave" + ] + }, + { + "fieldPath": "city", + "uniqueCount": 15, + "uniqueProportion": 0.8823529411764706, + "nullCount": 0, + "nullProportion": 0.0, + "distinctValueFrequencies": [ + { + "value": "Albany", + "frequency": 1 + }, + { + "value": "Atlantic City", + "frequency": 1 + }, + { + "value": "Chicago", + "frequency": 1 + }, + { + "value": "Clearwater", + "frequency": 1 + }, + { + "value": "Daytona Beach", + "frequency": 1 + }, + { + "value": "Deerfield Beach", + "frequency": 1 + }, + { + "value": "Hollywood", + "frequency": 1 + }, + { + "value": "Long Beach", + "frequency": 1 + }, + { + "value": "Long Island", + "frequency": 1 + }, + { + "value": "Longview", + "frequency": 1 + }, + { + "value": "New Orleans", + "frequency": 1 + }, + { + "value": "New York", + "frequency": 2 + }, + { + "value": "Palm Springs", + "frequency": 1 + }, + { + "value": "Rosemont", + "frequency": 1 + }, + { + "value": "Seattle", + "frequency": 2 + } + ], + "sampleValues": [ + "Seattle", + "Seattle", + "Long Island", + "Albany", + "New York", + "New York", + "Chicago", + "Rosemont", + "Clearwater", + "Daytona Beach", + "Deerfield Beach", + "Long Beach", + "Palm Springs", + "Hollywood", + "New Orleans", + "Atlantic City", + "Longview" + ] + }, + { + "fieldPath": "state", + "uniqueCount": 8, + "uniqueProportion": 0.47058823529411764, + "nullCount": 0, + "nullProportion": 0.0, + "distinctValueFrequencies": [ + { + "value": "CA", + "frequency": 3 + }, + { + "value": "FL", + "frequency": 3 + }, + { + "value": "IL", + "frequency": 2 + }, + { + "value": "LA", + "frequency": 1 + }, + { + "value": "NJ", + "frequency": 1 + }, + { + "value": "NY", + "frequency": 4 + }, + { + "value": "TX", + "frequency": 1 + }, + { + "value": "WA", + "frequency": 2 + } + ], + "sampleValues": [ + "WA", + "WA", + "NY", + "NY", + "NY", + "NY", + "IL", + "IL", + "FL", + "FL", + "FL", + "CA", + "CA", + "CA", + "LA", + "NJ", + "TX" + ] + }, + { + "fieldPath": "zip", + "uniqueCount": 16, + "uniqueProportion": 0.9411764705882353, + "nullCount": 0, + "nullProportion": 0.0, + "distinctValueFrequencies": [ + { + "value": "08401", + "frequency": 1 + }, + { + "value": "10019", + "frequency": 2 + }, + { + "value": "11788", + "frequency": 1 + }, + { + "value": "12203", + "frequency": 1 + }, + { + "value": "20005", + "frequency": 1 + }, + { + "value": "20037", + "frequency": 1 + }, + { + "value": "32018", + "frequency": 1 + }, + { + "value": "33441", + "frequency": 1 + }, + { + "value": "33575", + "frequency": 1 + }, + { + "value": "60018", + "frequency": 1 + }, + { + "value": "60601", + "frequency": 1 + }, + { + "value": "70112", + "frequency": 1 + }, + { + "value": "75601", + "frequency": 1 + }, + { + "value": "90029", + "frequency": 1 + }, + { + "value": "90804", + "frequency": 1 + }, + { + "value": "92262", + "frequency": 1 + } + ], + "sampleValues": [ + "20005", + "20037", + "11788", + "12203", + "10019", + "10019", + "60601", + "60018", + "33575", + "32018", + "33441", + "90804", + "92262", + "90029", + "70112", + "08401", + "75601" + ] + } + ] + } }, "systemMetadata": { "lastObserved": 1586847600000, "runId": "hana-test", - "registryName": null, - "registryVersion": null, - "properties": null + "lastRunId": "no-run-id-provided" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hana,hotel.maintenance,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { - "value": "{\"timestampMillis\": 1586847600000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 3, \"columnCount\": 5, \"fieldProfiles\": [{\"fieldPath\": \"mno\", \"uniqueCount\": 3, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"10\", \"11\", \"12\"]}, {\"fieldPath\": \"hno\", \"uniqueCount\": 3, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"24\", \"25\", \"26\"]}, {\"fieldPath\": \"description\", \"uniqueCount\": 3, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Replace pool liner and pump\", \"Renovate the bar area. Replace TV and speakers\", \"Roof repair due to storm\"]}, {\"fieldPath\": \"date_performed\", \"uniqueCount\": 2, \"uniqueProportion\": 1.0, \"nullCount\": 1, \"nullProportion\": 0.3333333333333333, \"min\": \"2019-03-21\", \"max\": \"2020-11-29\", \"sampleValues\": [\"2019-03-21\", \"2020-11-29\"]}, {\"fieldPath\": \"performed_by\", \"uniqueCount\": 2, \"uniqueProportion\": 1.0, \"nullCount\": 1, \"nullProportion\": 0.3333333333333333, \"sampleValues\": [\"Discount Pool Supplies\", \"TV and Audio Superstore\"]}]}", - "contentType": "application/json" + "json": { + "timestampMillis": 1586847600000, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "rowCount": 3, + "columnCount": 5, + "fieldProfiles": [ + { + "fieldPath": "mno", + "uniqueCount": 3, + "uniqueProportion": 1, + "nullCount": 0, + "nullProportion": 0.0, + "min": "10", + "max": "12", + "mean": "11.0", + "median": "11", + "stdev": "1.0", + "sampleValues": [ + "10", + "11", + "12" + ] + }, + { + "fieldPath": "hno", + "uniqueCount": 3, + "uniqueProportion": 1, + "nullCount": 0, + "nullProportion": 0.0, + "min": "24", + "max": "26", + "mean": "25.0", + "median": "25", + "stdev": "1.0", + "sampleValues": [ + "24", + "25", + "26" + ] + }, + { + "fieldPath": "description", + "uniqueCount": 3, + "uniqueProportion": 1, + "nullCount": 0, + "nullProportion": 0.0, + "sampleValues": [ + "Replace pool liner and pump", + "Renovate the bar area. Replace TV and speakers", + "Roof repair due to storm" + ] + }, + { + "fieldPath": "date_performed", + "uniqueCount": 2, + "uniqueProportion": 1, + "nullCount": 1, + "nullProportion": 0.3333333333333333, + "min": "2019-03-21", + "max": "2020-11-29", + "sampleValues": [ + "2019-03-21", + "2020-11-29" + ] + }, + { + "fieldPath": "performed_by", + "uniqueCount": 2, + "uniqueProportion": 1, + "nullCount": 1, + "nullProportion": 0.3333333333333333, + "sampleValues": [ + "Discount Pool Supplies", + "TV and Audio Superstore" + ] + } + ] + } }, "systemMetadata": { "lastObserved": 1586847600000, "runId": "hana-test", - "registryName": null, - "registryVersion": null, - "properties": null + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/looker/golden_test_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_ingest.json index 76c8f04e8447a7..9c0363e0892f0d 100644 --- a/metadata-ingestion/tests/integration/looker/golden_test_ingest.json +++ b/metadata-ingestion/tests/integration/looker/golden_test_ingest.json @@ -1,22 +1,4 @@ [ -{ - "entityType": "dataPlatformInstance", - "entityUrn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)", - "changeType": "UPSERT", - "aspectName": "dataPlatformInfo", - "aspect": { - "json": { - "name": "ap-south-1", - "type": "OTHERS", - "datasetNameDelimiter": "." - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "looker-test", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "container", "entityUrn": "urn:li:container:e7fe6fc9c3ca70e78694dcc5dd9c05b7", @@ -805,22 +787,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "dataPlatformInstance", - "entityUrn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "looker-test", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "tag", "entityUrn": "urn:li:tag:Dimension", diff --git a/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/data.model.lkml b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/data.model.lkml index 6eb92d749c9f7f..a87381dd0bf759 100644 --- a/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/data.model.lkml +++ b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/data.model.lkml @@ -6,6 +6,9 @@ include: "employee_total_income.view.lkml" include: "top_10_employee_income_source.view.lkml" include: "employee_tax_report.view.lkml" include: "employee_salary_rating.view.lkml" +include: "environment_activity_logs.view.lkml" +include: "employee_income_source_as_per_env.view.lkml" +include: "rent_as_employee_income_source.view.lkml" explore: activity_logs { } @@ -23,4 +26,13 @@ explore: employee_tax_report { } explore: employee_salary_rating { +} + +explore: environment_activity_logs { +} + +explore: employee_income_source_as_per_env { +} + +explore: rent_as_employee_income_source { } \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/employee_income_source_as_per_env.view.lkml b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/employee_income_source_as_per_env.view.lkml new file mode 100644 index 00000000000000..4b8e0dd46a8ce3 --- /dev/null +++ b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/employee_income_source_as_per_env.view.lkml @@ -0,0 +1,40 @@ +view: employee_income_source_as_per_env { + derived_table: { + sql: SELECT + employee_id, + employee_name, + {% if dw_eff_dt_date._is_selected or finance_dw_eff_dt_date._is_selected %} + prod_core.data.r_metric_summary_v2 + {% elsif dw_eff_dt_week._is_selected or finance_dw_eff_dt_week._is_selected %} + prod_core.data.r_metric_summary_v3 + {% else %} + 'default_table' as source + {% endif %}, + employee_income + FROM -- if dev -- dev_income_source -- if prod -- prod_income_source + WHERE + {% condition source_region %} source_table.region {% endcondition %} + ;; + } + + dimension: id { + type: number + sql: ${TABLE}.employee_id;; + } + + dimension: name { + type: string + sql: ${TABLE}.employee_name;; + } + + dimension: source { + type: string + sql: ${TABLE}.source ;; + } + + dimension: income { + type: number + sql: ${TABLE}.employee_income ;; + } + +} diff --git a/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/environment_activity_logs.view.lkml b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/environment_activity_logs.view.lkml new file mode 100644 index 00000000000000..efc7ba82754b88 --- /dev/null +++ b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/environment_activity_logs.view.lkml @@ -0,0 +1,12 @@ +view: environment_activity_logs { + sql_table_name: -- if prod -- prod.staging_app.stg_app__activity_logs + -- if dev -- {{ _user_attributes['dev_database_prefix'] }}analytics.{{ _user_attributes['dev_schema_prefix'] }}staging_app.stg_app__activity_logs + ;; + + dimension: generated_message_id { + group_label: "IDs" + primary_key: yes + type: number + sql: ${TABLE}."GENERATED_MESSAGE_ID" ;; + } +} diff --git a/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/rent_as_employee_income_source.view.lkml b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/rent_as_employee_income_source.view.lkml new file mode 100644 index 00000000000000..40b6e3642f3b34 --- /dev/null +++ b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/rent_as_employee_income_source.view.lkml @@ -0,0 +1,27 @@ +view: rent_as_employee_income_source { + sql_table_name: ( + SELECT id, + name, + source + FROM ${employee_income_source.SQL_TABLE_NAME} + WHERE source = "RENT" + ORDER BY source desc + LIMIT 10 + );; + + + dimension: id { + type: number + sql: ${TABLE}.id ;; + } + + dimension: name { + type: string + sql: ${TABLE}.name ;; + } + + dimension: source { + type: string + sql: ${TABLE}.source ;; + } +} \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/lookml/vv_lineage_liquid_template_golden.json b/metadata-ingestion/tests/integration/lookml/vv_lineage_liquid_template_golden.json index d12ced5e425066..b723aff080bc44 100644 --- a/metadata-ingestion/tests/integration/lookml/vv_lineage_liquid_template_golden.json +++ b/metadata-ingestion/tests/integration/lookml/vv_lineage_liquid_template_golden.json @@ -1580,6 +1580,720 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.environment_activity_logs,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.environment_activity_logs,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "view: environment_activity_logs {\n sql_table_name: -- if prod -- prod.staging_app.stg_app__activity_logs\n -- if dev -- {{ _user_attributes['dev_database_prefix'] }}analytics.{{ _user_attributes['dev_schema_prefix'] }}staging_app.stg_app__activity_logs\n ;;\n\n dimension: generated_message_id {\n group_label: \"IDs\"\n primary_key: yes\n type: number\n sql: ${TABLE}.\"GENERATED_MESSAGE_ID\" ;;\n }\n}\n", + "viewLanguage": "lookml" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.environment_activity_logs,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.environment_activity_logs,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/Develop/lkml_samples/" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1586847600000, + "actor": "urn:li:corpuser:datahub" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,prod.staging_app.stg_app__activity_logs,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,prod.staging_app.stg_app__activity_logs,PROD),generated_message_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.environment_activity_logs,PROD),generated_message_id)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "environment_activity_logs", + "platform": "urn:li:dataPlatform:looker", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "generated_message_id", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "number", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": true + } + ], + "primaryKeys": [ + "generated_message_id" + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "looker.file.path": "environment_activity_logs.view.lkml", + "looker.model": "data" + }, + "name": "environment_activity_logs", + "tags": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.environment_activity_logs,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.employee_income_source_as_per_env,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.employee_income_source_as_per_env,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "SELECT\n employee_id,\n employee_name,\n {% if dw_eff_dt_date._is_selected or finance_dw_eff_dt_date._is_selected %}\n prod_core.data.r_metric_summary_v2\n {% elsif dw_eff_dt_week._is_selected or finance_dw_eff_dt_week._is_selected %}\n prod_core.data.r_metric_summary_v3\n {% else %}\n 'default_table' as source\n {% endif %},\n employee_income\n FROM -- if dev -- dev_income_source -- if prod -- prod_income_source\n WHERE\n {% condition source_region %} source_table.region {% endcondition %}", + "viewLanguage": "sql" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.employee_income_source_as_per_env,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.employee_income_source_as_per_env,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/Develop/lkml_samples/" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1586847600000, + "actor": "urn:li:corpuser:datahub" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,prod_income_source,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,prod_income_source,PROD),employee_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.employee_income_source_as_per_env,PROD),id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,prod_income_source,PROD),employee_name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.employee_income_source_as_per_env,PROD),name)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,prod_income_source,PROD),source)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.employee_income_source_as_per_env,PROD),source)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,prod_income_source,PROD),employee_income)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.employee_income_source_as_per_env,PROD),income)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "employee_income_source_as_per_env", + "platform": "urn:li:dataPlatform:looker", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "id", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "number", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "name", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "source", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "income", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "number", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + } + ], + "primaryKeys": [] + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "looker.file.path": "employee_income_source_as_per_env.view.lkml", + "looker.model": "data" + }, + "name": "employee_income_source_as_per_env", + "tags": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.employee_income_source_as_per_env,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.rent_as_employee_income_source,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.rent_as_employee_income_source,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "view: rent_as_employee_income_source {\n sql_table_name: (\n SELECT id,\n name,\n source\n FROM ${employee_income_source.SQL_TABLE_NAME}\n WHERE source = \"RENT\"\n ORDER BY source desc\n LIMIT 10\n );;\n\n\n dimension: id {\n type: number\n sql: ${TABLE}.id ;;\n }\n\n dimension: name {\n type: string\n sql: ${TABLE}.name ;;\n }\n\n dimension: source {\n type: string\n sql: ${TABLE}.source ;;\n }\n}", + "viewLanguage": "lookml" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.rent_as_employee_income_source,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.rent_as_employee_income_source,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/Develop/lkml_samples/" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1586847600000, + "actor": "urn:li:corpuser:datahub" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.employee_income_source,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.employee_income_source,PROD),id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.rent_as_employee_income_source,PROD),id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.employee_income_source,PROD),name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.rent_as_employee_income_source,PROD),name)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.employee_income_source,PROD),source)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.rent_as_employee_income_source,PROD),source)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "rent_as_employee_income_source", + "platform": "urn:li:dataPlatform:looker", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "id", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "number", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "name", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "source", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + } + ], + "primaryKeys": [] + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "looker.file.path": "rent_as_employee_income_source.view.lkml", + "looker.model": "data" + }, + "name": "rent_as_employee_income_source", + "tags": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.rent_as_employee_income_source,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "tag", "entityUrn": "urn:li:tag:Dimension", diff --git a/metadata-ingestion/tests/integration/mysql/mysql_mces_no_db_golden.json b/metadata-ingestion/tests/integration/mysql/mysql_mces_no_db_golden.json index f78b45fb373fc6..bddccc856c842c 100644 --- a/metadata-ingestion/tests/integration/mysql/mysql_mces_no_db_golden.json +++ b/metadata-ingestion/tests/integration/mysql/mysql_mces_no_db_golden.json @@ -147,7 +147,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": true }, @@ -159,7 +159,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false }, @@ -171,7 +171,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=14)", + "nativeDataType": "VARCHAR(14)", "recursive": false, "isPartOfKey": false }, @@ -183,7 +183,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=16)", + "nativeDataType": "VARCHAR(16)", "recursive": false, "isPartOfKey": false }, @@ -195,7 +195,7 @@ "com.linkedin.pegasus2avro.schema.EnumType": {} } }, - "nativeDataType": "ENUM('M', 'F')", + "nativeDataType": "ENUM('M','F')", "recursive": false, "glossaryTerms": { "terms": [ @@ -218,7 +218,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false } @@ -334,7 +334,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": true }, @@ -346,7 +346,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -358,7 +358,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": true }, @@ -370,7 +370,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false } @@ -439,7 +439,6 @@ }, "rowCount": 10, "columnCount": 6, - "sizeInBytes": 16384, "fieldProfiles": [ { "fieldPath": "emp_no", @@ -574,7 +573,8 @@ "1989-08-24" ] } - ] + ], + "sizeInBytes": 16384 } }, "systemMetadata": { @@ -597,7 +597,6 @@ }, "rowCount": 112, "columnCount": 4, - "sizeInBytes": 16384, "fieldProfiles": [ { "fieldPath": "emp_no", @@ -991,7 +990,8 @@ "1993-02-09" ] } - ] + ], + "sizeInBytes": 16384 } }, "systemMetadata": { @@ -1166,7 +1166,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=500)", + "nativeDataType": "VARCHAR(500)", "recursive": false, "isPartOfKey": true }, @@ -1178,7 +1178,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=200)", + "nativeDataType": "VARCHAR(200)", "recursive": false, "isPartOfKey": true }, @@ -1190,7 +1190,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "BIGINT()", + "nativeDataType": "BIGINT", "recursive": false, "isPartOfKey": true }, @@ -1202,7 +1202,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "LONGTEXT()", + "nativeDataType": "LONGTEXT", "recursive": false, "isPartOfKey": false }, @@ -1214,7 +1214,7 @@ "com.linkedin.pegasus2avro.schema.TimeType": {} } }, - "nativeDataType": "DATETIME(fsp=6)", + "nativeDataType": "DATETIME(6)", "recursive": false, "isPartOfKey": false }, @@ -1226,7 +1226,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=255)", + "nativeDataType": "VARCHAR(255)", "recursive": false, "isPartOfKey": false }, @@ -1238,7 +1238,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=255)", + "nativeDataType": "VARCHAR(255)", "recursive": false, "isPartOfKey": false } @@ -1373,7 +1373,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "BIGINT()", + "nativeDataType": "BIGINT", "recursive": false, "isPartOfKey": true }, @@ -1386,7 +1386,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=200)", + "nativeDataType": "VARCHAR(200)", "recursive": false, "isPartOfKey": false }, @@ -1398,7 +1398,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=150)", + "nativeDataType": "VARCHAR(150)", "recursive": false, "isPartOfKey": false }, @@ -1410,7 +1410,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=150)", + "nativeDataType": "VARCHAR(150)", "recursive": false, "isPartOfKey": false }, @@ -1422,7 +1422,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "BIGINT()", + "nativeDataType": "BIGINT", "recursive": false, "isPartOfKey": false }, @@ -1434,7 +1434,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=200)", + "nativeDataType": "VARCHAR(200)", "recursive": false, "isPartOfKey": false }, @@ -1446,7 +1446,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "DOUBLE(asdecimal=True)", + "nativeDataType": "DOUBLE", "recursive": false, "isPartOfKey": false } @@ -1583,7 +1583,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "BIGINT()", + "nativeDataType": "BIGINT", "recursive": false, "isPartOfKey": false }, @@ -1595,7 +1595,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=200)", + "nativeDataType": "VARCHAR(200)", "recursive": false, "isPartOfKey": false }, @@ -1607,7 +1607,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=150)", + "nativeDataType": "VARCHAR(150)", "recursive": false, "isPartOfKey": false }, @@ -1619,7 +1619,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "DOUBLE(asdecimal=True)", + "nativeDataType": "DOUBLE", "recursive": false, "isPartOfKey": false } @@ -1858,7 +1858,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": true }, @@ -1870,7 +1870,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=50)", + "nativeDataType": "VARCHAR(50)", "recursive": false, "isPartOfKey": false }, @@ -1882,7 +1882,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=50)", + "nativeDataType": "VARCHAR(50)", "recursive": false, "isPartOfKey": false }, @@ -1894,7 +1894,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=50)", + "nativeDataType": "VARCHAR(50)", "recursive": false, "isPartOfKey": false }, @@ -1906,7 +1906,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=50)", + "nativeDataType": "VARCHAR(50)", "recursive": false, "glossaryTerms": { "terms": [ @@ -1929,7 +1929,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "FLOAT()", + "nativeDataType": "FLOAT", "recursive": false, "isPartOfKey": false } @@ -2045,7 +2045,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": true }, @@ -2057,7 +2057,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=50)", + "nativeDataType": "VARCHAR(50)", "recursive": false, "isPartOfKey": false }, @@ -2069,7 +2069,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false } @@ -2150,7 +2150,6 @@ }, "rowCount": 5, "columnCount": 6, - "sizeInBytes": 16384, "fieldProfiles": [ { "fieldPath": "id", @@ -2259,7 +2258,8 @@ "3.8" ] } - ] + ], + "sizeInBytes": 16384 } }, "systemMetadata": { @@ -2282,7 +2282,6 @@ }, "rowCount": 0, "columnCount": 3, - "sizeInBytes": 16384, "fieldProfiles": [ { "fieldPath": "id", @@ -2299,7 +2298,8 @@ "uniqueCount": 0, "nullCount": 0 } - ] + ], + "sizeInBytes": 16384 } }, "systemMetadata": { @@ -2456,7 +2456,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "SET('a', 'b', 'c', 'd')", + "nativeDataType": "SET('a','b','c','d')", "recursive": false, "isPartOfKey": false } @@ -2572,7 +2572,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=50)", + "nativeDataType": "VARCHAR(50)", "recursive": false, "isPartOfKey": false } @@ -2641,14 +2641,14 @@ }, "rowCount": 0, "columnCount": 1, - "sizeInBytes": 16384, "fieldProfiles": [ { "fieldPath": "col", "uniqueCount": 0, "nullCount": 0 } - ] + ], + "sizeInBytes": 16384 } }, "systemMetadata": { @@ -2671,14 +2671,14 @@ }, "rowCount": 0, "columnCount": 1, - "sizeInBytes": 16384, "fieldProfiles": [ { "fieldPath": "dummy", "uniqueCount": 0, "nullCount": 0 } - ] + ], + "sizeInBytes": 16384 } }, "systemMetadata": { diff --git a/metadata-ingestion/tests/integration/mysql/mysql_mces_with_db_golden.json b/metadata-ingestion/tests/integration/mysql/mysql_mces_with_db_golden.json index 065d6cbe90b313..8c6f6338bc2b07 100644 --- a/metadata-ingestion/tests/integration/mysql/mysql_mces_with_db_golden.json +++ b/metadata-ingestion/tests/integration/mysql/mysql_mces_with_db_golden.json @@ -165,7 +165,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": true }, @@ -177,7 +177,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=50)", + "nativeDataType": "VARCHAR(50)", "recursive": false, "isPartOfKey": false }, @@ -189,7 +189,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=50)", + "nativeDataType": "VARCHAR(50)", "recursive": false, "isPartOfKey": false }, @@ -201,7 +201,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=50)", + "nativeDataType": "VARCHAR(50)", "recursive": false, "isPartOfKey": false }, @@ -213,7 +213,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=50)", + "nativeDataType": "VARCHAR(50)", "recursive": false, "isPartOfKey": false }, @@ -225,7 +225,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "FLOAT()", + "nativeDataType": "FLOAT", "recursive": false, "isPartOfKey": false } @@ -359,7 +359,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": true }, @@ -371,7 +371,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=50)", + "nativeDataType": "VARCHAR(50)", "recursive": false, "isPartOfKey": false }, @@ -383,7 +383,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false } diff --git a/metadata-ingestion/tests/integration/mysql/mysql_table_level_only.json b/metadata-ingestion/tests/integration/mysql/mysql_table_level_only.json index b8dfd7d9efc37d..3f5899aa8a98c3 100644 --- a/metadata-ingestion/tests/integration/mysql/mysql_table_level_only.json +++ b/metadata-ingestion/tests/integration/mysql/mysql_table_level_only.json @@ -147,7 +147,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": true }, @@ -159,7 +159,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=50)", + "nativeDataType": "VARCHAR(50)", "recursive": false, "isPartOfKey": false }, @@ -171,7 +171,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=50)", + "nativeDataType": "VARCHAR(50)", "recursive": false, "isPartOfKey": false }, @@ -183,7 +183,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=50)", + "nativeDataType": "VARCHAR(50)", "recursive": false, "isPartOfKey": false }, @@ -195,7 +195,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=50)", + "nativeDataType": "VARCHAR(50)", "recursive": false, "isPartOfKey": false }, @@ -207,7 +207,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "FLOAT()", + "nativeDataType": "FLOAT", "recursive": false, "isPartOfKey": false } @@ -323,7 +323,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": true }, @@ -335,7 +335,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=50)", + "nativeDataType": "VARCHAR(50)", "recursive": false, "isPartOfKey": false }, @@ -347,7 +347,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false } @@ -461,63 +461,5 @@ "runId": "mysql-2020_04_14-07_00_00", "lastRunId": "no-run-id-provided" } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:dc2ae101b66746b9c2b6df8ee89ca88f", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-2020_04_14-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.customers,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:dc2ae101b66746b9c2b6df8ee89ca88f", - "urn": "urn:li:container:dc2ae101b66746b9c2b6df8ee89ca88f" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-2020_04_14-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.orders,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:dc2ae101b66746b9c2b6df8ee89ca88f", - "urn": "urn:li:container:dc2ae101b66746b9c2b6df8ee89ca88f" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-2020_04_14-07_00_00", - "lastRunId": "no-run-id-provided" - } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/mysql/mysql_table_row_count_estimate_only.json b/metadata-ingestion/tests/integration/mysql/mysql_table_row_count_estimate_only.json index fc25af0400bb5d..58a70cae2b2d5e 100644 --- a/metadata-ingestion/tests/integration/mysql/mysql_table_row_count_estimate_only.json +++ b/metadata-ingestion/tests/integration/mysql/mysql_table_row_count_estimate_only.json @@ -147,7 +147,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": true }, @@ -159,7 +159,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=50)", + "nativeDataType": "VARCHAR(50)", "recursive": false, "isPartOfKey": false }, @@ -171,7 +171,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=50)", + "nativeDataType": "VARCHAR(50)", "recursive": false, "isPartOfKey": false }, @@ -183,7 +183,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=50)", + "nativeDataType": "VARCHAR(50)", "recursive": false, "isPartOfKey": false }, @@ -195,7 +195,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=50)", + "nativeDataType": "VARCHAR(50)", "recursive": false, "isPartOfKey": false }, @@ -207,7 +207,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "FLOAT()", + "nativeDataType": "FLOAT", "recursive": false, "isPartOfKey": false } @@ -323,7 +323,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": true }, @@ -335,7 +335,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=50)", + "nativeDataType": "VARCHAR(50)", "recursive": false, "isPartOfKey": false }, @@ -347,7 +347,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false } diff --git a/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_database.json b/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_database.json index b740dfe025ef7f..6732b17d2e8322 100644 --- a/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_database.json +++ b/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_database.json @@ -258,7 +258,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "NUMBER(asdecimal=False)", + "nativeDataType": "NUMBER", "recursive": false, "isPartOfKey": true } @@ -380,7 +380,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "NUMBER(asdecimal=False)", + "nativeDataType": "NUMBER", "recursive": false, "isPartOfKey": true } @@ -505,7 +505,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "NUMBER(asdecimal=False)", + "nativeDataType": "NUMBER", "recursive": false, "isPartOfKey": false } @@ -754,7 +754,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "NUMBER(asdecimal=False)", + "nativeDataType": "NUMBER", "recursive": false, "isPartOfKey": true } @@ -876,7 +876,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "NUMBER(asdecimal=False)", + "nativeDataType": "NUMBER", "recursive": false, "isPartOfKey": true } @@ -1001,7 +1001,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "NUMBER(asdecimal=False)", + "nativeDataType": "NUMBER", "recursive": false, "isPartOfKey": false } @@ -1207,213 +1207,5 @@ "runId": "oracle-2022_02_03-07_00_00", "lastRunId": "no-run-id-provided" } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", - "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test1,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", - "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - }, - { - "id": "urn:li:container:c093e810646c7ebc493237bb24a3538f", - "urn": "urn:li:container:c093e810646c7ebc493237bb24a3538f" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test2,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", - "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - }, - { - "id": "urn:li:container:c093e810646c7ebc493237bb24a3538f", - "urn": "urn:li:container:c093e810646c7ebc493237bb24a3538f" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.view1,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", - "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - }, - { - "id": "urn:li:container:c093e810646c7ebc493237bb24a3538f", - "urn": "urn:li:container:c093e810646c7ebc493237bb24a3538f" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", - "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test3,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", - "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - }, - { - "id": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", - "urn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test4,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", - "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - }, - { - "id": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", - "urn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.view1,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", - "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - }, - { - "id": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", - "urn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" - } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_out_database.json b/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_out_database.json index 008cd405186c39..7610daaa54b4a1 100644 --- a/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_out_database.json +++ b/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_out_database.json @@ -258,7 +258,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "NUMBER(asdecimal=False)", + "nativeDataType": "NUMBER", "recursive": false, "isPartOfKey": true } @@ -380,7 +380,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "NUMBER(asdecimal=False)", + "nativeDataType": "NUMBER", "recursive": false, "isPartOfKey": true } @@ -505,7 +505,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "NUMBER(asdecimal=False)", + "nativeDataType": "NUMBER", "recursive": false, "isPartOfKey": false } @@ -754,7 +754,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "NUMBER(asdecimal=False)", + "nativeDataType": "NUMBER", "recursive": false, "isPartOfKey": true } @@ -876,7 +876,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "NUMBER(asdecimal=False)", + "nativeDataType": "NUMBER", "recursive": false, "isPartOfKey": true } @@ -1001,7 +1001,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "NUMBER(asdecimal=False)", + "nativeDataType": "NUMBER", "recursive": false, "isPartOfKey": false } @@ -1053,214 +1053,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.view1,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", - "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - }, - { - "id": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", - "urn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", - "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.test1,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", - "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - }, - { - "id": "urn:li:container:c093e810646c7ebc493237bb24a3538f", - "urn": "urn:li:container:c093e810646c7ebc493237bb24a3538f" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.test2,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", - "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - }, - { - "id": "urn:li:container:c093e810646c7ebc493237bb24a3538f", - "urn": "urn:li:container:c093e810646c7ebc493237bb24a3538f" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.view1,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", - "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - }, - { - "id": "urn:li:container:c093e810646c7ebc493237bb24a3538f", - "urn": "urn:li:container:c093e810646c7ebc493237bb24a3538f" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", - "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.test3,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", - "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - }, - { - "id": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", - "urn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.test4,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", - "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - }, - { - "id": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", - "urn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.view1,PROD)", diff --git a/metadata-ingestion/tests/integration/oracle/test_oracle.py b/metadata-ingestion/tests/integration/oracle/test_oracle.py index 6c9aba8ec5620e..4541bb8ac65bff 100644 --- a/metadata-ingestion/tests/integration/oracle/test_oracle.py +++ b/metadata-ingestion/tests/integration/oracle/test_oracle.py @@ -24,6 +24,7 @@ def apply_mock_data(self, mock_create_engine, mock_inspect, mock_event): inspector_magic_mock.dialect.server_version_info = ( self.get_server_version_info() ) + inspector_magic_mock.dialect.type_compiler.process = lambda x: "NUMBER" mock_inspect.return_value = inspector_magic_mock mock_create_engine.connect.return_value = connection_magic_mock diff --git a/metadata-ingestion/tests/integration/postgres/postgres_all_db_mces_with_db_golden.json b/metadata-ingestion/tests/integration/postgres/postgres_all_db_mces_with_db_golden.json index f35ff9fdb9d153..f42ff7c0df068c 100644 --- a/metadata-ingestion/tests/integration/postgres/postgres_all_db_mces_with_db_golden.json +++ b/metadata-ingestion/tests/integration/postgres/postgres_all_db_mces_with_db_golden.json @@ -452,7 +452,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=500)", + "nativeDataType": "VARCHAR(500)", "recursive": false, "isPartOfKey": true }, @@ -464,7 +464,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=200)", + "nativeDataType": "VARCHAR(200)", "recursive": false, "isPartOfKey": true }, @@ -476,7 +476,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "BIGINT()", + "nativeDataType": "BIGINT", "recursive": false, "isPartOfKey": true }, @@ -488,7 +488,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "TEXT()", + "nativeDataType": "TEXT", "recursive": false, "isPartOfKey": false }, @@ -500,7 +500,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "TEXT()", + "nativeDataType": "TEXT", "recursive": false, "isPartOfKey": false }, @@ -512,7 +512,7 @@ "com.linkedin.pegasus2avro.schema.TimeType": {} } }, - "nativeDataType": "TIMESTAMP()", + "nativeDataType": "TIMESTAMP WITHOUT TIME ZONE", "recursive": false, "isPartOfKey": false }, @@ -524,7 +524,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=255)", + "nativeDataType": "VARCHAR(255)", "recursive": false, "isPartOfKey": false }, @@ -536,7 +536,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=255)", + "nativeDataType": "VARCHAR(255)", "recursive": false, "isPartOfKey": false }, @@ -548,7 +548,7 @@ "com.linkedin.pegasus2avro.schema.RecordType": {} } }, - "nativeDataType": "JSON(astext_type=Text())", + "nativeDataType": "JSON", "recursive": false, "isPartOfKey": false } @@ -671,7 +671,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=500)", + "nativeDataType": "VARCHAR(500)", "recursive": false, "isPartOfKey": false }, @@ -683,7 +683,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=200)", + "nativeDataType": "VARCHAR(200)", "recursive": false, "isPartOfKey": false } diff --git a/metadata-ingestion/tests/integration/postgres/postgres_mces_with_db_golden.json b/metadata-ingestion/tests/integration/postgres/postgres_mces_with_db_golden.json index f47789fc470cd8..f107fb1006bf6e 100644 --- a/metadata-ingestion/tests/integration/postgres/postgres_mces_with_db_golden.json +++ b/metadata-ingestion/tests/integration/postgres/postgres_mces_with_db_golden.json @@ -256,7 +256,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=500)", + "nativeDataType": "VARCHAR(500)", "recursive": false, "glossaryTerms": { "terms": [ @@ -279,7 +279,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=200)", + "nativeDataType": "VARCHAR(200)", "recursive": false, "isPartOfKey": true }, @@ -291,7 +291,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "BIGINT()", + "nativeDataType": "BIGINT", "recursive": false, "isPartOfKey": true }, @@ -303,7 +303,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "TEXT()", + "nativeDataType": "TEXT", "recursive": false, "isPartOfKey": false }, @@ -315,7 +315,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "TEXT()", + "nativeDataType": "TEXT", "recursive": false, "isPartOfKey": false }, @@ -327,7 +327,7 @@ "com.linkedin.pegasus2avro.schema.TimeType": {} } }, - "nativeDataType": "TIMESTAMP()", + "nativeDataType": "TIMESTAMP WITHOUT TIME ZONE", "recursive": false, "isPartOfKey": false }, @@ -339,7 +339,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=255)", + "nativeDataType": "VARCHAR(255)", "recursive": false, "glossaryTerms": { "terms": [ @@ -362,7 +362,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=255)", + "nativeDataType": "VARCHAR(255)", "recursive": false, "isPartOfKey": false }, @@ -374,7 +374,7 @@ "com.linkedin.pegasus2avro.schema.RecordType": {} } }, - "nativeDataType": "JSON(astext_type=Text())", + "nativeDataType": "JSON", "recursive": false, "isPartOfKey": false } @@ -497,7 +497,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=500)", + "nativeDataType": "VARCHAR(500)", "recursive": false, "isPartOfKey": false }, @@ -509,7 +509,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=200)", + "nativeDataType": "VARCHAR(200)", "recursive": false, "isPartOfKey": false } diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_lower_case_urn_ingest.json b/metadata-ingestion/tests/integration/powerbi/golden_test_lower_case_urn_ingest.json index d80aa02c4cb123..a4eb670a4b7f94 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_lower_case_urn_ingest.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_lower_case_urn_ingest.json @@ -1,7 +1,7 @@ [ { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.public_issue_history,PROD)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -17,9 +17,25 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User1@foo.com", + "changeType": "UPSERT", + "aspectName": "corpUserKey", + "aspect": { + "json": { + "username": "User1@foo.com" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.public_issue_history,PROD)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { @@ -40,13 +56,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User2@foo.com", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "corpUserKey", "aspect": { "json": { - "removed": false + "username": "User2@foo.com" } }, "systemMetadata": { @@ -57,15 +73,14 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_testtable,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "viewProperties", "aspect": { "json": { - "typeNames": [ - "PowerBI Dataset Table", - "View" - ] + "materialized": false, + "viewLogic": "let\n Source = Snowflake.Databases(\"hp123rt5.ap-southeast-2.fakecomputing.com\",\"PBI_TEST_WAREHOUSE_PROD\",[Role=\"PBI_TEST_MEMBER\"]),\n PBI_TEST_Database = Source{[Name=\"PBI_TEST\",Kind=\"Database\"]}[Data],\n TEST_Schema = PBI_TEST_Database{[Name=\"TEST\",Kind=\"Schema\"]}[Data],\n TESTTABLE_Table = TEST_Schema{[Name=\"TESTTABLE\",Kind=\"Table\"]}[Data]\nin\n TESTTABLE_Table", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -76,14 +91,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_testtable,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.public_issue_history,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "status", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Snowflake.Databases(\"hp123rt5.ap-southeast-2.fakecomputing.com\",\"PBI_TEST_WAREHOUSE_PROD\",[Role=\"PBI_TEST_MEMBER\"]),\n PBI_TEST_Database = Source{[Name=\"PBI_TEST\",Kind=\"Database\"]}[Data],\n TEST_Schema = PBI_TEST_Database{[Name=\"TEST\",Kind=\"Schema\"]}[Data],\n TESTTABLE_Table = TEST_Schema{[Name=\"TESTTABLE\",Kind=\"Table\"]}[Data]\nin\n TESTTABLE_Table", - "viewLanguage": "m_query" + "removed": false } }, "systemMetadata": { @@ -94,7 +107,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_testtable,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_testtable,PROD)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { @@ -115,8 +128,8 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_testtable,DEV)", + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User1@foo.com", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -132,15 +145,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_testtable,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_testtable,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "status", "aspect": { "json": { - "typeNames": [ - "PowerBI Dataset Table", - "View" - ] + "removed": false } }, "systemMetadata": { @@ -151,14 +161,15 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.public_issue_history,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"SELECT#(lf)concat((UPPER(REPLACE(SELLER,'-',''))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4\", null, [EnableFolding=true]),\n #\"Added Conditional Column\" = Table.AddColumn(Source, \"SME Units ENT\", each if [DEAL_TYPE] = \"SME Unit\" then [UNIT] else 0),\n #\"Added Conditional Column1\" = Table.AddColumn(#\"Added Conditional Column\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" then [UNIT] else 0),\n #\"Removed Columns\" = Table.RemoveColumns(#\"Added Conditional Column1\",{\"Banklink Units\"}),\n #\"Added Custom\" = Table.AddColumn(#\"Removed Columns\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" and [SALES_TYPE] = \"3 - Upsell\"\nthen [UNIT]\n\nelse if [SALES_TYPE] = \"Adjusted BL Migration\"\nthen [UNIT]\n\nelse 0),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"SME Units in $ (*$361)\", each if [DEAL_TYPE] = \"SME Unit\" \nand [SALES_TYPE] <> \"4 - Renewal\"\n then [UNIT] * 361\nelse 0),\n #\"Added Custom2\" = Table.AddColumn(#\"Added Custom1\", \"Banklink in $ (*$148)\", each [Banklink Units] * 148)\nin\n #\"Added Custom2\"", - "viewLanguage": "m_query" + "typeNames": [ + "PowerBI Dataset Table", + "View" + ] } }, "systemMetadata": { @@ -169,18 +180,15 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_testtable,PROD)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "subTypes", "aspect": { "json": { - "customProperties": { - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" - }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "snowflake native-query", - "description": "Library dataset description", - "tags": [] + "typeNames": [ + "PowerBI Dataset Table", + "View" + ] } }, "systemMetadata": { @@ -190,8 +198,8 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User2@foo.com", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -207,15 +215,14 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.big-query-with-parameter,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "viewProperties", "aspect": { "json": { - "typeNames": [ - "PowerBI Dataset Table", - "View" - ] + "materialized": false, + "viewLogic": "let\n Source = GoogleBigQuery.Database([BillingProject = #\"Parameter - Source\"]),\n#\"gcp-project\" = Source{[Name=#\"Parameter - Source\"]}[Data],\nuniversal_Schema = #\"gcp-project\"{[Name=\"universal\",Kind=\"Schema\"]}[Data],\nD_WH_DATE_Table = universal_Schema{[Name=\"D_WH_DATE\",Kind=\"Table\"]}[Data],\n#\"Filtered Rows\" = Table.SelectRows(D_WH_DATE_Table, each [D_DATE] > #datetime(2019, 9, 10, 0, 0, 0)),\n#\"Filtered Rows1\" = Table.SelectRows(#\"Filtered Rows\", each DateTime.IsInPreviousNHours([D_DATE], 87600))\n in \n#\"Filtered Rows1\"", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -226,13 +233,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_native-query,PROD)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { "json": { "materialized": false, - "viewLogic": "let\n Source = GoogleBigQuery.Database([BillingProject = #\"Parameter - Source\"]),\n#\"gcp-project\" = Source{[Name=#\"Parameter - Source\"]}[Data],\nuniversal_Schema = #\"gcp-project\"{[Name=\"universal\",Kind=\"Schema\"]}[Data],\nD_WH_DATE_Table = universal_Schema{[Name=\"D_WH_DATE\",Kind=\"Table\"]}[Data],\n#\"Filtered Rows\" = Table.SelectRows(D_WH_DATE_Table, each [D_DATE] > #datetime(2019, 9, 10, 0, 0, 0)),\n#\"Filtered Rows1\" = Table.SelectRows(#\"Filtered Rows\", each DateTime.IsInPreviousNHours([D_DATE], 87600))\n in \n#\"Filtered Rows1\"", + "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"SELECT#(lf)concat((UPPER(REPLACE(SELLER,'-',''))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4\", null, [EnableFolding=true]),\n #\"Added Conditional Column\" = Table.AddColumn(Source, \"SME Units ENT\", each if [DEAL_TYPE] = \"SME Unit\" then [UNIT] else 0),\n #\"Added Conditional Column1\" = Table.AddColumn(#\"Added Conditional Column\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" then [UNIT] else 0),\n #\"Removed Columns\" = Table.RemoveColumns(#\"Added Conditional Column1\",{\"Banklink Units\"}),\n #\"Added Custom\" = Table.AddColumn(#\"Removed Columns\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" and [SALES_TYPE] = \"3 - Upsell\"\nthen [UNIT]\n\nelse if [SALES_TYPE] = \"Adjusted BL Migration\"\nthen [UNIT]\n\nelse 0),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"SME Units in $ (*$361)\", each if [DEAL_TYPE] = \"SME Unit\" \nand [SALES_TYPE] <> \"4 - Renewal\"\n then [UNIT] * 361\nelse 0),\n #\"Added Custom2\" = Table.AddColumn(#\"Added Custom1\", \"Banklink in $ (*$148)\", each [Banklink Units] * 148)\nin\n #\"Added Custom2\"", "viewLanguage": "m_query" } }, @@ -244,7 +251,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.big-query-with-parameter,PROD)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { @@ -266,23 +273,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.job-history,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -301,14 +292,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.big-query-with-parameter,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "status", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"xaa48144.snowflakecomputing.com\",\"GSL_TEST_WH\",[Role=\"ACCOUNTADMIN\"]){[Name=\"GSL_TEST_DB\"]}[Data], \"select A.name from GSL_TEST_DB.PUBLIC.SALES_ANALYST as A inner join GSL_TEST_DB.PUBLIC.SALES_FORECAST as B on A.name = B.name where startswith(A.name, 'mo')\", null, [EnableFolding=true])\nin\n Source", - "viewLanguage": "m_query" + "removed": false } }, "systemMetadata": { @@ -319,7 +308,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_native-query,PROD)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { @@ -328,7 +317,7 @@ "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" }, "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "snowflake native-query-with-join", + "name": "snowflake native-query", "description": "Library dataset description", "tags": [] } @@ -341,12 +330,15 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.big-query-with-parameter,PROD)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "subTypes", "aspect": { "json": { - "removed": false + "typeNames": [ + "PowerBI Dataset Table", + "View" + ] } }, "systemMetadata": { @@ -357,15 +349,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.job-history,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "status", "aspect": { "json": { - "typeNames": [ - "PowerBI Dataset Table", - "View" - ] + "removed": false } }, "systemMetadata": { @@ -376,14 +365,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_native-query,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "status", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Oracle.Database(\"localhost:1521/salesdb.domain.com\", [HierarchicalNavigation=true]), HR = Source{[Schema=\"HR\"]}[Data], EMPLOYEES1 = HR{[Name=\"EMPLOYEES\"]}[Data] \n in EMPLOYEES1", - "viewLanguage": "m_query" + "removed": false } }, "systemMetadata": { @@ -394,7 +381,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.job-history,PROD)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { @@ -416,12 +403,33 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_native-query,PROD)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "subTypes", "aspect": { "json": { - "removed": false + "typeNames": [ + "PowerBI Dataset Table", + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.job-history,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "let\n Source = Oracle.Database(\"localhost:1521/salesdb.domain.com\", [HierarchicalNavigation=true]), HR = Source{[Schema=\"HR\"]}[Data], EMPLOYEES1 = HR{[Name=\"EMPLOYEES\"]}[Data] \n in EMPLOYEES1", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -432,7 +440,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_native-query-with-join,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -451,14 +459,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_native-query-with-join,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "status", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = PostgreSQL.Database(\"localhost\" , \"mics\" ),\n public_order_date = Source{[Schema=\"public\",Item=\"order_date\"]}[Data] \n in \n public_order_date", - "viewLanguage": "m_query" + "removed": false } }, "systemMetadata": { @@ -469,7 +475,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_native-query-with-join,PROD)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { @@ -478,7 +484,7 @@ "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" }, "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "postgres_test_table", + "name": "snowflake native-query-with-join", "description": "Library dataset description", "tags": [] } @@ -491,12 +497,14 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_native-query-with-join,PROD)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "viewProperties", "aspect": { "json": { - "removed": false + "materialized": false, + "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"xaa48144.snowflakecomputing.com\",\"GSL_TEST_WH\",[Role=\"ACCOUNTADMIN\"]){[Name=\"GSL_TEST_DB\"]}[Data], \"select A.name from GSL_TEST_DB.PUBLIC.SALES_ANALYST as A inner join GSL_TEST_DB.PUBLIC.SALES_FORECAST as B on A.name = B.name where startswith(A.name, 'mo')\", null, [EnableFolding=true])\nin\n Source", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -507,15 +515,14 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.postgres_test_table,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "viewProperties", "aspect": { "json": { - "typeNames": [ - "PowerBI Dataset Table", - "View" - ] + "materialized": false, + "viewLogic": "let\n Source = PostgreSQL.Database(\"localhost\" , \"mics\" ),\n public_order_date = Source{[Schema=\"public\",Item=\"order_date\"]}[Data] \n in \n public_order_date", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -525,15 +532,52 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "chartInfo", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Sql.Database(\"localhost\", \"library\"),\n dbo_book_issue = Source{[Schema=\"dbo\",Item=\"book_issue\"]}[Data]\n in dbo_book_issue", - "viewLanguage": "m_query" + "customProperties": { + "createdFrom": "Dataset", + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", + "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details" + }, + "title": "test_tile", + "description": "test_tile", + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "inputs": [ + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.public_issue_history,PROD)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_testtable,PROD)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_native-query,PROD)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.big-query-with-parameter,PROD)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_native-query-with-join,PROD)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.job-history,PROD)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.postgres_test_table,PROD)" + } + ] } }, "systemMetadata": { @@ -544,17 +588,17 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.postgres_test_table,PROD)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { "json": { "customProperties": { - "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", - "name": "dbo_book_issue", - "description": "hr pbi test description", + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "postgres_test_table", + "description": "Library dataset description", "tags": [] } }, @@ -565,13 +609,15 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "subTypes", "aspect": { "json": { - "removed": false + "typeNames": [ + "PowerBI Tile" + ] } }, "systemMetadata": { @@ -582,15 +628,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.postgres_test_table,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "status", "aspect": { "json": { - "typeNames": [ - "PowerBI Dataset Table", - "View" - ] + "removed": false } }, "systemMetadata": { @@ -600,15 +643,15 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "browsePaths", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"select *,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTH_WID) as CD_AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,'-',''))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_PS_CD_RETENTION\", CommandTimeout=#duration(0, 1, 30, 0)]),\n #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"mth_date\", type date}}),\n #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"Month\", each Date.Month([mth_date])),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"TPV Opening\", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n #\"Added Custom1\"", - "viewLanguage": "m_query" + "paths": [ + "/powerbi/demo-workspace" + ] } }, "systemMetadata": { @@ -619,18 +662,15 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.postgres_test_table,PROD)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "subTypes", "aspect": { "json": { - "customProperties": { - "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" - }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", - "name": "ms_sql_native_table", - "description": "hr pbi test description", - "tags": [] + "typeNames": [ + "PowerBI Dataset Table", + "View" + ] } }, "systemMetadata": { @@ -640,8 +680,49 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "changeType": "UPSERT", + "aspectName": "chartKey", + "aspect": { + "json": { + "dashboardTool": "powerbi", + "chartId": "myPlatformInstance.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:powerbi,myPlatformInstance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:powerbi,myPlatformInstance)" + }, + { + "id": "demo-workspace" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -657,7 +738,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.hr_pbi_test.dbo_book_issue,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -675,13 +756,13 @@ } }, { - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User1@foo.com", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.hr_pbi_test.dbo_book_issue,PROD)", "changeType": "UPSERT", - "aspectName": "corpUserKey", + "aspectName": "status", "aspect": { "json": { - "username": "User1@foo.com" + "removed": false } }, "systemMetadata": { @@ -691,13 +772,19 @@ } }, { - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User2@foo.com", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.hr_pbi_test.dbo_book_issue,PROD)", "changeType": "UPSERT", - "aspectName": "corpUserKey", + "aspectName": "datasetProperties", "aspect": { "json": { - "username": "User2@foo.com" + "customProperties": { + "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", + "name": "dbo_book_issue", + "description": "hr pbi test description", + "tags": [] } }, "systemMetadata": { @@ -707,51 +794,33 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.hr_pbi_test.dbo_book_issue,PROD)", "changeType": "UPSERT", - "aspectName": "chartInfo", + "aspectName": "viewProperties", "aspect": { "json": { - "customProperties": { - "createdFrom": "Dataset", - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", - "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details" - }, - "title": "test_tile", - "description": "test_tile", - "lastModified": { - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - }, - "inputs": [ - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_testtable,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)" - } + "materialized": false, + "viewLogic": "let\n Source = Sql.Database(\"localhost\", \"library\"),\n dbo_book_issue = Source{[Schema=\"dbo\",Item=\"book_issue\"]}[Data]\n in dbo_book_issue", + "viewLanguage": "m_query" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.hr_pbi_test.ms_sql_native_table,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "PowerBI Dataset Table", + "View" ] } }, @@ -762,8 +831,8 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.hr_pbi_test.ms_sql_native_table,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -778,15 +847,19 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.hr_pbi_test.ms_sql_native_table,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "datasetProperties", "aspect": { "json": { - "typeNames": [ - "PowerBI Tile" - ] + "customProperties": { + "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", + "name": "ms_sql_native_table", + "description": "hr pbi test description", + "tags": [] } }, "systemMetadata": { @@ -796,14 +869,15 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.hr_pbi_test.ms_sql_native_table,PROD)", "changeType": "UPSERT", - "aspectName": "chartKey", + "aspectName": "viewProperties", "aspect": { "json": { - "dashboardTool": "powerbi", - "chartId": "charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0" + "materialized": false, + "viewLogic": "let\n Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"select *,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTH_WID) as CD_AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,'-',''))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_PS_CD_RETENTION\", CommandTimeout=#duration(0, 1, 30, 0)]),\n #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"mth_date\", type date}}),\n #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"Month\", each Date.Month([mth_date])),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"TPV Opening\", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n #\"Added Custom1\"", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -813,15 +887,27 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,myPlatformInstance.dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", - "aspectName": "browsePaths", + "aspectName": "ownership", "aspect": { "json": { - "paths": [ - "/powerbi/demo-workspace" - ] + "owners": [ + { + "owner": "urn:li:corpuser:users.User1@foo.com", + "type": "NONE" + }, + { + "owner": "urn:li:corpuser:users.User2@foo.com", + "type": "NONE" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } } }, "systemMetadata": { @@ -831,17 +917,14 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,myPlatformInstance.dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "dashboardKey", "aspect": { "json": { - "path": [ - { - "id": "demo-workspace" - } - ] + "dashboardTool": "powerbi", + "dashboardId": "powerbi.linkedin.com/dashboards/7D668CAD-7FFC-4505-9215-655BCA5BEBAE" } }, "systemMetadata": { @@ -852,7 +935,7 @@ }, { "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", "aspectName": "chartInfo", "aspect": { @@ -876,10 +959,10 @@ }, "inputs": [ { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)" + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.hr_pbi_test.dbo_book_issue,PROD)" }, { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)" + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.hr_pbi_test.ms_sql_native_table,PROD)" } ] } @@ -892,7 +975,7 @@ }, { "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -907,43 +990,8 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", - "changeType": "UPSERT", - "aspectName": "chartKey", - "aspect": { - "json": { - "dashboardTool": "powerbi", - "chartId": "charts.23212598-23b5-4980-87cc-5fc0ecd84385" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "PowerBI Tile" - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,myPlatformInstance.dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", "aspectName": "browsePaths", "aspect": { @@ -961,15 +1009,13 @@ }, { "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "subTypes", "aspect": { "json": { - "path": [ - { - "id": "demo-workspace" - } + "typeNames": [ + "PowerBI Tile" ] } }, @@ -980,15 +1026,14 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "browsePaths", + "aspectName": "chartKey", "aspect": { "json": { - "paths": [ - "/powerbi/demo-workspace" - ] + "dashboardTool": "powerbi", + "chartId": "myPlatformInstance.charts.23212598-23b5-4980-87cc-5fc0ecd84385" } }, "systemMetadata": { @@ -999,7 +1044,7 @@ }, { "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityUrn": "urn:li:dashboard:(powerbi,myPlatformInstance.dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "PATCH", "aspectName": "dashboardInfo", "aspect": { @@ -1031,13 +1076,13 @@ }, { "op": "add", - "path": "/charts/urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", - "value": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)" + "path": "/charts/urn:li:chart:(powerbi,myPlatformInstance.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "value": "urn:li:chart:(powerbi,myPlatformInstance.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)" }, { "op": "add", - "path": "/charts/urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", - "value": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)" + "path": "/charts/urn:li:chart:(powerbi,myPlatformInstance.charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "value": "urn:li:chart:(powerbi,myPlatformInstance.charts.23212598-23b5-4980-87cc-5fc0ecd84385)" }, { "op": "add", @@ -1067,30 +1112,21 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "dashboardKey", + "aspectName": "browsePathsV2", "aspect": { "json": { - "dashboardTool": "powerbi", - "dashboardId": "powerbi.linkedin.com/dashboards/7D668CAD-7FFC-4505-9215-655BCA5BEBAE" + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:powerbi,myPlatformInstance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:powerbi,myPlatformInstance)" + }, + { + "id": "demo-workspace" + } + ] } }, "systemMetadata": { @@ -1100,27 +1136,15 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "ownership", + "aspectName": "browsePaths", "aspect": { "json": { - "owners": [ - { - "owner": "urn:li:corpuser:users.User1@foo.com", - "type": "NONE" - }, - { - "owner": "urn:li:corpuser:users.User2@foo.com", - "type": "NONE" - } - ], - "ownerTypes": {}, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } + "paths": [ + "/powerbi/demo-workspace" + ] } }, "systemMetadata": { @@ -1131,12 +1155,16 @@ }, { "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityUrn": "urn:li:dashboard:(powerbi,myPlatformInstance.dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:powerbi,myPlatformInstance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:powerbi,myPlatformInstance)" + }, { "id": "demo-workspace" } @@ -1150,24 +1178,8 @@ } }, { - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User1@foo.com", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User2@foo.com", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,myPlatformInstance.dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", "aspectName": "status", "aspect": { diff --git a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py index 6a95ec2c1dda42..23b23ecada0d49 100644 --- a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py +++ b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py @@ -819,6 +819,8 @@ def test_powerbi_ingest_urn_lower_case( "type": "powerbi", "config": { **default_source_config(), + "env": "PROD", + "platform_instance": "myPlatformInstance", "convert_urns_to_lowercase": True, "convert_lineage_urns_to_lowercase": True, }, diff --git a/metadata-ingestion/tests/integration/salesforce/test_salesforce.py b/metadata-ingestion/tests/integration/salesforce/test_salesforce.py index 8b6b883b2148d2..89a37a372df843 100644 --- a/metadata-ingestion/tests/integration/salesforce/test_salesforce.py +++ b/metadata-ingestion/tests/integration/salesforce/test_salesforce.py @@ -1,10 +1,12 @@ import json import pathlib from unittest import mock +from unittest.mock import Mock from freezegun import freeze_time from datahub.ingestion.run.pipeline import Pipeline +from datahub.ingestion.source.salesforce import SalesforceConfig, SalesforceSource from tests.test_helpers import mce_helpers FROZEN_TIME = "2022-05-12 11:00:00" @@ -19,15 +21,16 @@ def _read_response(file_name: str) -> dict: return data -def side_effect_call_salesforce(type, url): - class MockResponse: - def __init__(self, json_data, status_code): - self.json_data = json_data - self.status_code = status_code +class MockResponse: + def __init__(self, json_data, status_code): + self.json_data = json_data + self.status_code = status_code + + def json(self): + return self.json_data - def json(self): - return self.json_data +def side_effect_call_salesforce(type, url): if url.endswith("/services/data/"): return MockResponse(_read_response("versions_response.json"), 200) if url.endswith("FROM EntityDefinition WHERE IsCustomizable = true"): @@ -55,9 +58,92 @@ def json(self): return MockResponse({}, 404) +@mock.patch("datahub.ingestion.source.salesforce.Salesforce") +def test_latest_version(mock_sdk): + mock_sf = mock.Mock() + mocked_call = mock.Mock() + mocked_call.side_effect = side_effect_call_salesforce + mock_sf._call_salesforce = mocked_call + mock_sdk.return_value = mock_sf + + config = SalesforceConfig.parse_obj( + { + "auth": "DIRECT_ACCESS_TOKEN", + "instance_url": "https://mydomain.my.salesforce.com/", + "access_token": "access_token`", + "ingest_tags": True, + "object_pattern": { + "allow": [ + "^Account$", + "^Property__c$", + ], + }, + "domain": {"sales": {"allow": {"^Property__c$"}}}, + "profiling": {"enabled": True}, + "profile_pattern": { + "allow": [ + "^Property__c$", + ] + }, + } + ) + SalesforceSource(config=config, ctx=Mock()) + calls = mock_sf._call_salesforce.mock_calls + assert ( + len(calls) == 1 + ), "We didn't specify version but source didn't call SF API to get the latest one" + assert calls[0].ends_with( + "/services/data" + ), "Source didn't call proper SF API endpoint to get all versions" + assert ( + mock_sf.sf_version == "54.0" + ), "API version was not correctly set (see versions_responses.json)" + + +@mock.patch("datahub.ingestion.source.salesforce.Salesforce") +def test_custom_version(mock_sdk): + mock_sf = mock.Mock() + mocked_call = mock.Mock() + mocked_call.side_effect = side_effect_call_salesforce + mock_sf._call_salesforce = mocked_call + mock_sdk.return_value = mock_sf + + config = SalesforceConfig.parse_obj( + { + "auth": "DIRECT_ACCESS_TOKEN", + "api_version": "46.0", + "instance_url": "https://mydomain.my.salesforce.com/", + "access_token": "access_token`", + "ingest_tags": True, + "object_pattern": { + "allow": [ + "^Account$", + "^Property__c$", + ], + }, + "domain": {"sales": {"allow": {"^Property__c$"}}}, + "profiling": {"enabled": True}, + "profile_pattern": { + "allow": [ + "^Property__c$", + ] + }, + } + ) + SalesforceSource(config=config, ctx=Mock()) + + calls = mock_sf._call_salesforce.mock_calls + assert ( + len(calls) == 0 + ), "Source called API to get all versions even though we specified proper version" + assert ( + mock_sdk.call_args.kwargs["version"] == "46.0" + ), "API client object was not correctly initialized with the custom version" + + @freeze_time(FROZEN_TIME) def test_salesforce_ingest(pytestconfig, tmp_path): - with mock.patch("simple_salesforce.Salesforce") as mock_sdk: + with mock.patch("datahub.ingestion.source.salesforce.Salesforce") as mock_sdk: mock_sf = mock.Mock() mocked_call = mock.Mock() mocked_call.side_effect = side_effect_call_salesforce diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json index 4c0c1c6512ec77..74cb216117bd43 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json @@ -112,11 +112,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "0565425f-2083-45d3-bb61-76e0ee5e1117", + "job_id": "c6fb6778-14f1-4516-bb41-e5eaa97a687b", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-01-19 11:45:06.667000", - "date_modified": "2024-01-19 11:45:06.840000", + "date_created": "2024-07-27 23:58:29.780000", + "date_modified": "2024-07-27 23:58:29.943000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -1304,7 +1304,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -1316,7 +1316,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "NVARCHAR()", + "nativeDataType": "NVARCHAR(max) COLLATE SQL_Latin1_General_CP1_CI_AS", "recursive": false, "isPartOfKey": false } @@ -1546,7 +1546,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -1558,7 +1558,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "NVARCHAR()", + "nativeDataType": "NVARCHAR(max) COLLATE SQL_Latin1_General_CP1_CI_AS", "recursive": false, "isPartOfKey": false } @@ -1678,7 +1678,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": true }, @@ -1691,7 +1691,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=255, collation='SQL_Latin1_General_CP1_CI_AS')", + "nativeDataType": "VARCHAR(255) COLLATE SQL_Latin1_General_CP1_CI_AS", "recursive": false, "isPartOfKey": false }, @@ -1703,7 +1703,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=255, collation='SQL_Latin1_General_CP1_CI_AS')", + "nativeDataType": "VARCHAR(255) COLLATE SQL_Latin1_General_CP1_CI_AS", "recursive": false, "isPartOfKey": false }, @@ -1715,7 +1715,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false } @@ -1835,7 +1835,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": true }, @@ -1847,7 +1847,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "UNIQUEIDENTIFIER()", + "nativeDataType": "UNIQUEIDENTIFIER", "recursive": false, "isPartOfKey": false }, @@ -1859,7 +1859,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "NVARCHAR(length=50)", + "nativeDataType": "NVARCHAR(50) COLLATE SQL_Latin1_General_CP1_CI_AS", "recursive": false, "isPartOfKey": false } @@ -1961,8 +1961,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-01-19 11:45:06.590000", - "date_modified": "2024-01-19 11:45:06.590000" + "date_created": "2024-07-27 23:58:29.703000", + "date_modified": "2024-07-27 23:58:29.703000" }, "externalUrl": "", "name": "demodata.Foo.Proc.With.SpecialChar", @@ -3560,7 +3560,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -3572,7 +3572,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "NVARCHAR()", + "nativeDataType": "NVARCHAR(max) COLLATE SQL_Latin1_General_CP1_CI_AS", "recursive": false, "isPartOfKey": false }, @@ -3584,7 +3584,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "MONEY()", + "nativeDataType": "MONEY", "recursive": false, "isPartOfKey": false } @@ -3813,7 +3813,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -3825,7 +3825,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "NVARCHAR()", + "nativeDataType": "NVARCHAR(max) COLLATE SQL_Latin1_General_CP1_CI_AS", "recursive": false, "isPartOfKey": false }, @@ -3837,7 +3837,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "SMALLMONEY()", + "nativeDataType": "SMALLMONEY", "recursive": false, "isPartOfKey": false } @@ -3957,7 +3957,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": true }, @@ -3969,7 +3969,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=255, collation='SQL_Latin1_General_CP1_CI_AS')", + "nativeDataType": "VARCHAR(255) COLLATE SQL_Latin1_General_CP1_CI_AS", "recursive": false, "isPartOfKey": false }, @@ -3981,7 +3981,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=255, collation='SQL_Latin1_General_CP1_CI_AS')", + "nativeDataType": "VARCHAR(255) COLLATE SQL_Latin1_General_CP1_CI_AS", "recursive": false, "isPartOfKey": false }, @@ -3993,7 +3993,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false } @@ -4442,800 +4442,5 @@ "runId": "mssql-test", "lastRunId": "no-run-id-provided" } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f1b4c0e379c4b2e2e09a8ecd6c1b6dec", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bad84e08ecf49aee863df68243d8b9d0", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:e48d82445eeacfbe13b431f0bb1826ee", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:884bfecd9e414990a494681293413e8e", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:142ca5fc51b7f44e5e6a424bf1043590", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:1b9d125d390447de36719bfb8dd1f782", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:fcd4c8da3739150766f91e7f6c2a3a30", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:2029cab615b3cd82cb87b153957d2e92", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:556e25ccec98892284f017f870ef7809", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d41a036a2e6cfa44b834edf7683199ec", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.dbo.Products,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - }, - { - "id": "urn:li:container:d41a036a2e6cfa44b834edf7683199ec", - "urn": "urn:li:container:d41a036a2e6cfa44b834edf7683199ec" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.Items,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - }, - { - "id": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671", - "urn": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.Persons,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - }, - { - "id": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671", - "urn": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.SalesReason,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - }, - { - "id": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671", - "urn": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a6bea84fba7b05fb5d12630c8e6306ac", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:9f37bb7baa7ded19cc023e9f644a8cf8", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:3f157d8292fb473142f19e2250af537f", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:47217386c89d8b94153f6ee31e7e77ec", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59", - "urn": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:5eb0d61efa998d1ccd5cbdc6ce4bb4af", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59", - "urn": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:2816b2cb7f90d3dce64125ba89fb1fa8", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59", - "urn": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:20d0f0c94e9796ff44ff32d4d0e19084", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59", - "urn": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:3600d2ebb33b25dac607624d7eae7575", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59", - "urn": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:280f2e3aefacc346d0ce1590ec337c7d", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59", - "urn": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:cba5c3ca7f028fcf749593be369d3c24", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59", - "urn": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:58c30fa72f213ca7e12fb04f5a7d150f", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59", - "urn": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:9387ddfeb7b57672cabd761ade89c49c", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59", - "urn": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:3a5f70e0e34834d4eeeb4d5a5caf03d0", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59", - "urn": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,NewData.dbo.ProductsNew,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59", - "urn": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59" - }, - { - "id": "urn:li:container:3a5f70e0e34834d4eeeb4d5a5caf03d0", - "urn": "urn:li:container:3a5f70e0e34834d4eeeb4d5a5caf03d0" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:7cc43e5b4e2a7f2f66f1df774d1a0c63", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59", - "urn": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,NewData.FooNew.ItemsNew,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59", - "urn": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59" - }, - { - "id": "urn:li:container:7cc43e5b4e2a7f2f66f1df774d1a0c63", - "urn": "urn:li:container:7cc43e5b4e2a7f2f66f1df774d1a0c63" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,NewData.FooNew.PersonsNew,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59", - "urn": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59" - }, - { - "id": "urn:li:container:7cc43e5b4e2a7f2f66f1df774d1a0c63", - "urn": "urn:li:container:7cc43e5b4e2a7f2f66f1df774d1a0c63" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:54727d9fd7deacef27641559125bbc56", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59", - "urn": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:141b0980dcb08f48544583e47cf48807", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59", - "urn": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:c6627af82d44de89492e1a9315ae9f4b", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59", - "urn": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json index 02c357259c3f53..e1af3f72a8af1f 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json @@ -112,11 +112,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "0565425f-2083-45d3-bb61-76e0ee5e1117", + "job_id": "c6fb6778-14f1-4516-bb41-e5eaa97a687b", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-01-19 11:45:06.667000", - "date_modified": "2024-01-19 11:45:06.840000", + "date_created": "2024-07-27 23:58:29.780000", + "date_modified": "2024-07-27 23:58:29.943000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -1304,7 +1304,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -1316,7 +1316,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "NVARCHAR()", + "nativeDataType": "NVARCHAR(max) COLLATE SQL_Latin1_General_CP1_CI_AS", "recursive": false, "isPartOfKey": false } @@ -1546,7 +1546,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -1558,7 +1558,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "NVARCHAR()", + "nativeDataType": "NVARCHAR(max) COLLATE SQL_Latin1_General_CP1_CI_AS", "recursive": false, "isPartOfKey": false } @@ -1678,7 +1678,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": true }, @@ -1691,7 +1691,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=255, collation='SQL_Latin1_General_CP1_CI_AS')", + "nativeDataType": "VARCHAR(255) COLLATE SQL_Latin1_General_CP1_CI_AS", "recursive": false, "isPartOfKey": false }, @@ -1703,7 +1703,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=255, collation='SQL_Latin1_General_CP1_CI_AS')", + "nativeDataType": "VARCHAR(255) COLLATE SQL_Latin1_General_CP1_CI_AS", "recursive": false, "isPartOfKey": false }, @@ -1715,7 +1715,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false } @@ -1835,7 +1835,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": true }, @@ -1847,7 +1847,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "UNIQUEIDENTIFIER()", + "nativeDataType": "UNIQUEIDENTIFIER", "recursive": false, "isPartOfKey": false }, @@ -1859,7 +1859,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "NVARCHAR(length=50)", + "nativeDataType": "NVARCHAR(50) COLLATE SQL_Latin1_General_CP1_CI_AS", "recursive": false, "isPartOfKey": false } @@ -1961,8 +1961,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-01-19 11:45:06.590000", - "date_modified": "2024-01-19 11:45:06.590000" + "date_created": "2024-07-27 23:58:29.703000", + "date_modified": "2024-07-27 23:58:29.703000" }, "externalUrl": "", "name": "demodata.Foo.Proc.With.SpecialChar", @@ -2385,415 +2385,5 @@ "runId": "mssql-test", "lastRunId": "no-run-id-provided" } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f1b4c0e379c4b2e2e09a8ecd6c1b6dec", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bad84e08ecf49aee863df68243d8b9d0", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:e48d82445eeacfbe13b431f0bb1826ee", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:884bfecd9e414990a494681293413e8e", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:142ca5fc51b7f44e5e6a424bf1043590", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:1b9d125d390447de36719bfb8dd1f782", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:fcd4c8da3739150766f91e7f6c2a3a30", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:2029cab615b3cd82cb87b153957d2e92", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:556e25ccec98892284f017f870ef7809", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d41a036a2e6cfa44b834edf7683199ec", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.dbo.Products,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - }, - { - "id": "urn:li:container:d41a036a2e6cfa44b834edf7683199ec", - "urn": "urn:li:container:d41a036a2e6cfa44b834edf7683199ec" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.Items,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - }, - { - "id": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671", - "urn": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.Persons,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - }, - { - "id": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671", - "urn": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.SalesReason,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - }, - { - "id": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671", - "urn": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a6bea84fba7b05fb5d12630c8e6306ac", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:9f37bb7baa7ded19cc023e9f644a8cf8", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:3f157d8292fb473142f19e2250af537f", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json index 02c357259c3f53..e1af3f72a8af1f 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json @@ -112,11 +112,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "0565425f-2083-45d3-bb61-76e0ee5e1117", + "job_id": "c6fb6778-14f1-4516-bb41-e5eaa97a687b", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-01-19 11:45:06.667000", - "date_modified": "2024-01-19 11:45:06.840000", + "date_created": "2024-07-27 23:58:29.780000", + "date_modified": "2024-07-27 23:58:29.943000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -1304,7 +1304,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -1316,7 +1316,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "NVARCHAR()", + "nativeDataType": "NVARCHAR(max) COLLATE SQL_Latin1_General_CP1_CI_AS", "recursive": false, "isPartOfKey": false } @@ -1546,7 +1546,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -1558,7 +1558,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "NVARCHAR()", + "nativeDataType": "NVARCHAR(max) COLLATE SQL_Latin1_General_CP1_CI_AS", "recursive": false, "isPartOfKey": false } @@ -1678,7 +1678,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": true }, @@ -1691,7 +1691,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=255, collation='SQL_Latin1_General_CP1_CI_AS')", + "nativeDataType": "VARCHAR(255) COLLATE SQL_Latin1_General_CP1_CI_AS", "recursive": false, "isPartOfKey": false }, @@ -1703,7 +1703,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=255, collation='SQL_Latin1_General_CP1_CI_AS')", + "nativeDataType": "VARCHAR(255) COLLATE SQL_Latin1_General_CP1_CI_AS", "recursive": false, "isPartOfKey": false }, @@ -1715,7 +1715,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false } @@ -1835,7 +1835,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": true }, @@ -1847,7 +1847,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "UNIQUEIDENTIFIER()", + "nativeDataType": "UNIQUEIDENTIFIER", "recursive": false, "isPartOfKey": false }, @@ -1859,7 +1859,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "NVARCHAR(length=50)", + "nativeDataType": "NVARCHAR(50) COLLATE SQL_Latin1_General_CP1_CI_AS", "recursive": false, "isPartOfKey": false } @@ -1961,8 +1961,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-01-19 11:45:06.590000", - "date_modified": "2024-01-19 11:45:06.590000" + "date_created": "2024-07-27 23:58:29.703000", + "date_modified": "2024-07-27 23:58:29.703000" }, "externalUrl": "", "name": "demodata.Foo.Proc.With.SpecialChar", @@ -2385,415 +2385,5 @@ "runId": "mssql-test", "lastRunId": "no-run-id-provided" } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f1b4c0e379c4b2e2e09a8ecd6c1b6dec", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bad84e08ecf49aee863df68243d8b9d0", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:e48d82445eeacfbe13b431f0bb1826ee", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:884bfecd9e414990a494681293413e8e", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:142ca5fc51b7f44e5e6a424bf1043590", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:1b9d125d390447de36719bfb8dd1f782", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:fcd4c8da3739150766f91e7f6c2a3a30", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:2029cab615b3cd82cb87b153957d2e92", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:556e25ccec98892284f017f870ef7809", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d41a036a2e6cfa44b834edf7683199ec", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.dbo.Products,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - }, - { - "id": "urn:li:container:d41a036a2e6cfa44b834edf7683199ec", - "urn": "urn:li:container:d41a036a2e6cfa44b834edf7683199ec" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.Items,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - }, - { - "id": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671", - "urn": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.Persons,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - }, - { - "id": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671", - "urn": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.SalesReason,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - }, - { - "id": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671", - "urn": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a6bea84fba7b05fb5d12630c8e6306ac", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:9f37bb7baa7ded19cc023e9f644a8cf8", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:3f157d8292fb473142f19e2250af537f", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json index ad15c654e44c96..5b936c0d9f2446 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json @@ -112,11 +112,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "0565425f-2083-45d3-bb61-76e0ee5e1117", + "job_id": "c6fb6778-14f1-4516-bb41-e5eaa97a687b", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-01-19 11:45:06.667000", - "date_modified": "2024-01-19 11:45:06.840000", + "date_created": "2024-07-27 23:58:29.780000", + "date_modified": "2024-07-27 23:58:29.943000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -1304,7 +1304,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -1316,7 +1316,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "NVARCHAR()", + "nativeDataType": "NVARCHAR(max) COLLATE SQL_Latin1_General_CP1_CI_AS", "recursive": false, "isPartOfKey": false } @@ -1546,7 +1546,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -1558,7 +1558,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "NVARCHAR()", + "nativeDataType": "NVARCHAR(max) COLLATE SQL_Latin1_General_CP1_CI_AS", "recursive": false, "isPartOfKey": false } @@ -1678,7 +1678,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": true }, @@ -1691,7 +1691,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=255, collation='SQL_Latin1_General_CP1_CI_AS')", + "nativeDataType": "VARCHAR(255) COLLATE SQL_Latin1_General_CP1_CI_AS", "recursive": false, "isPartOfKey": false }, @@ -1703,7 +1703,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=255, collation='SQL_Latin1_General_CP1_CI_AS')", + "nativeDataType": "VARCHAR(255) COLLATE SQL_Latin1_General_CP1_CI_AS", "recursive": false, "isPartOfKey": false }, @@ -1715,7 +1715,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false } @@ -1835,7 +1835,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": true }, @@ -1847,7 +1847,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "UNIQUEIDENTIFIER()", + "nativeDataType": "UNIQUEIDENTIFIER", "recursive": false, "isPartOfKey": false }, @@ -1859,7 +1859,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "NVARCHAR(length=50)", + "nativeDataType": "NVARCHAR(50) COLLATE SQL_Latin1_General_CP1_CI_AS", "recursive": false, "isPartOfKey": false } @@ -1961,8 +1961,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-01-19 11:45:06.590000", - "date_modified": "2024-01-19 11:45:06.590000" + "date_created": "2024-07-27 23:58:29.703000", + "date_modified": "2024-07-27 23:58:29.703000" }, "externalUrl": "", "name": "demodata.Foo.Proc.With.SpecialChar", @@ -2385,415 +2385,5 @@ "runId": "mssql-test", "lastRunId": "no-run-id-provided" } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f1b4c0e379c4b2e2e09a8ecd6c1b6dec", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bad84e08ecf49aee863df68243d8b9d0", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:e48d82445eeacfbe13b431f0bb1826ee", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:884bfecd9e414990a494681293413e8e", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:142ca5fc51b7f44e5e6a424bf1043590", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:1b9d125d390447de36719bfb8dd1f782", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:fcd4c8da3739150766f91e7f6c2a3a30", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:2029cab615b3cd82cb87b153957d2e92", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:556e25ccec98892284f017f870ef7809", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d41a036a2e6cfa44b834edf7683199ec", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.dbo.products,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - }, - { - "id": "urn:li:container:d41a036a2e6cfa44b834edf7683199ec", - "urn": "urn:li:container:d41a036a2e6cfa44b834edf7683199ec" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - }, - { - "id": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671", - "urn": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - }, - { - "id": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671", - "urn": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - }, - { - "id": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671", - "urn": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a6bea84fba7b05fb5d12630c8e6306ac", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:9f37bb7baa7ded19cc023e9f644a8cf8", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:3f157d8292fb473142f19e2250af537f", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/trino/trino_hive_instance_mces_golden.json b/metadata-ingestion/tests/integration/trino/trino_hive_instance_mces_golden.json index c5664b9373e8c5..d19b83f4d4e2f8 100644 --- a/metadata-ingestion/tests/integration/trino/trino_hive_instance_mces_golden.json +++ b/metadata-ingestion/tests/integration/trino/trino_hive_instance_mces_golden.json @@ -244,7 +244,7 @@ "numrows": "1", "rawdatasize": "32", "totalsize": "33", - "transient_lastddltime": "1710150034" + "transient_lastddltime": "1722106707" }, "name": "array_struct_test", "description": "This table has array of structs", @@ -280,7 +280,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -505,7 +505,7 @@ "numrows": "3", "rawdatasize": "94", "totalsize": "97", - "transient_lastddltime": "1710150038" + "transient_lastddltime": "1722106711" }, "name": "classification_test", "tags": [] @@ -539,7 +539,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR()", + "nativeDataType": "VARCHAR", "recursive": false, "isPartOfKey": false }, @@ -551,7 +551,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR()", + "nativeDataType": "VARCHAR", "recursive": false, "isPartOfKey": false }, @@ -563,7 +563,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR()", + "nativeDataType": "VARCHAR", "recursive": false, "isPartOfKey": false }, @@ -575,7 +575,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR()", + "nativeDataType": "VARCHAR", "recursive": false, "isPartOfKey": false }, @@ -587,7 +587,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false } @@ -764,7 +764,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1710150036" + "transient_lastddltime": "1722106709" }, "name": "map_test", "tags": [] @@ -798,7 +798,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR()", + "nativeDataType": "VARCHAR", "recursive": false, "isPartOfKey": false }, @@ -991,7 +991,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1710150036" + "transient_lastddltime": "1722106709" }, "name": "nested_struct_test", "tags": [] @@ -1025,7 +1025,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -1262,7 +1262,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "transient_lastddltime": "1710150028" + "transient_lastddltime": "1722106702" }, "name": "pokes", "tags": [] @@ -1296,7 +1296,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -1308,7 +1308,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR()", + "nativeDataType": "VARCHAR", "recursive": false, "isPartOfKey": false }, @@ -1320,7 +1320,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR()", + "nativeDataType": "VARCHAR", "recursive": false, "isPartOfKey": false } @@ -1497,7 +1497,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1710150031" + "transient_lastddltime": "1722106704" }, "name": "struct_test", "tags": [] @@ -1531,7 +1531,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -1748,7 +1748,7 @@ "customProperties": { "numfiles": "0", "totalsize": "0", - "transient_lastddltime": "1710150036" + "transient_lastddltime": "1722106709" }, "name": "struct_test_view_materialized", "tags": [] @@ -1782,7 +1782,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -2002,7 +2002,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1710150031" + "transient_lastddltime": "1722106704" }, "name": "_test_table_underscore", "tags": [] @@ -2036,7 +2036,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -2048,7 +2048,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR()", + "nativeDataType": "VARCHAR", "recursive": false, "isPartOfKey": false } @@ -2225,7 +2225,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1710150036" + "transient_lastddltime": "1722106709" }, "name": "union_test", "tags": [] @@ -2527,7 +2527,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "transient_lastddltime": "1710150036", + "transient_lastddltime": "1722106709", "view_definition": "SELECT \"property_id\", \"service\"\nFROM \"db1\".\"array_struct_test\"", "is_view": "True" }, @@ -2563,7 +2563,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, diff --git a/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json b/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json index 2764433808cbdb..f72610fba7c547 100644 --- a/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json +++ b/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json @@ -231,7 +231,7 @@ "numrows": "1", "rawdatasize": "32", "totalsize": "33", - "transient_lastddltime": "1713211020" + "transient_lastddltime": "1722106707" }, "name": "array_struct_test", "description": "This table has array of structs", @@ -267,7 +267,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -471,7 +471,7 @@ "numrows": "3", "rawdatasize": "94", "totalsize": "97", - "transient_lastddltime": "1713211025" + "transient_lastddltime": "1722106711" }, "name": "classification_test", "tags": [] @@ -505,7 +505,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR()", + "nativeDataType": "VARCHAR", "recursive": false, "isPartOfKey": false }, @@ -517,7 +517,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR()", + "nativeDataType": "VARCHAR", "recursive": false, "glossaryTerms": { "terms": [ @@ -540,7 +540,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR()", + "nativeDataType": "VARCHAR", "recursive": false, "glossaryTerms": { "terms": [ @@ -563,7 +563,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR()", + "nativeDataType": "VARCHAR", "recursive": false, "glossaryTerms": { "terms": [ @@ -586,7 +586,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "glossaryTerms": { "terms": [ @@ -753,7 +753,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1713211023" + "transient_lastddltime": "1722106709" }, "name": "map_test", "tags": [] @@ -787,7 +787,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR()", + "nativeDataType": "VARCHAR", "recursive": false, "isPartOfKey": false }, @@ -959,7 +959,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1713211023" + "transient_lastddltime": "1722106709" }, "name": "nested_struct_test", "tags": [] @@ -993,7 +993,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -1209,7 +1209,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "transient_lastddltime": "1713211015" + "transient_lastddltime": "1722106702" }, "name": "pokes", "tags": [] @@ -1243,7 +1243,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -1255,7 +1255,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR()", + "nativeDataType": "VARCHAR", "recursive": false, "isPartOfKey": false }, @@ -1267,7 +1267,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR()", + "nativeDataType": "VARCHAR", "recursive": false, "isPartOfKey": false } @@ -1423,7 +1423,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1713211017" + "transient_lastddltime": "1722106704" }, "name": "struct_test", "tags": [] @@ -1457,7 +1457,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -1653,7 +1653,7 @@ "customProperties": { "numfiles": "0", "totalsize": "0", - "transient_lastddltime": "1713211023" + "transient_lastddltime": "1722106709" }, "name": "struct_test_view_materialized", "tags": [] @@ -1687,7 +1687,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -1886,7 +1886,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1713211017" + "transient_lastddltime": "1722106704" }, "name": "_test_table_underscore", "tags": [] @@ -1920,7 +1920,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -1932,7 +1932,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR()", + "nativeDataType": "VARCHAR", "recursive": false, "isPartOfKey": false } @@ -2088,7 +2088,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1713211023" + "transient_lastddltime": "1722106709" }, "name": "union_test", "tags": [] @@ -2369,7 +2369,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "transient_lastddltime": "1713211023", + "transient_lastddltime": "1722106709", "view_definition": "SELECT \"property_id\", \"service\"\nFROM \"db1\".\"array_struct_test\"", "is_view": "True" }, @@ -2405,7 +2405,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, diff --git a/metadata-ingestion/tests/integration/trino/trino_mces_golden.json b/metadata-ingestion/tests/integration/trino/trino_mces_golden.json index 1f03f02fa9408f..b2afad81b12fad 100644 --- a/metadata-ingestion/tests/integration/trino/trino_mces_golden.json +++ b/metadata-ingestion/tests/integration/trino/trino_mces_golden.json @@ -256,7 +256,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -268,7 +268,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=50)", + "nativeDataType": "VARCHAR(50)", "recursive": false, "isPartOfKey": false }, @@ -280,7 +280,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=50)", + "nativeDataType": "VARCHAR(50)", "recursive": false, "isPartOfKey": false }, @@ -292,7 +292,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=50)", + "nativeDataType": "VARCHAR(50)", "recursive": false, "isPartOfKey": false }, @@ -304,7 +304,7 @@ "com.linkedin.pegasus2avro.schema.RecordType": {} } }, - "nativeDataType": "JSON()", + "nativeDataType": "JSON", "recursive": false, "isPartOfKey": false }, @@ -504,7 +504,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -516,7 +516,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -528,7 +528,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false }, @@ -540,7 +540,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false } @@ -723,7 +723,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -735,7 +735,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=50)", + "nativeDataType": "VARCHAR(50)", "recursive": false, "isPartOfKey": false } @@ -918,7 +918,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -930,7 +930,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=50)", + "nativeDataType": "VARCHAR(50)", "recursive": false, "isPartOfKey": false }, @@ -942,7 +942,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=50)", + "nativeDataType": "VARCHAR(50)", "recursive": false, "isPartOfKey": false }, @@ -954,7 +954,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=50)", + "nativeDataType": "VARCHAR(50)", "recursive": false, "isPartOfKey": false }, @@ -966,7 +966,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -978,7 +978,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false } @@ -1479,142 +1479,5 @@ "runId": "trino-test", "lastRunId": "no-run-id-provided" } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [] - } - }, - "systemMetadata": { - "lastObserved": 1632398400000, - "runId": "trino-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761", - "urn": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1632398400000, - "runId": "trino-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.book,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761", - "urn": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761" - }, - { - "id": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c", - "urn": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1632398400000, - "runId": "trino-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.issue_history,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761", - "urn": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761" - }, - { - "id": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c", - "urn": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1632398400000, - "runId": "trino-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.member,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761", - "urn": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761" - }, - { - "id": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c", - "urn": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1632398400000, - "runId": "trino-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.book_in_circulation,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761", - "urn": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761" - }, - { - "id": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c", - "urn": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1632398400000, - "runId": "trino-test", - "lastRunId": "no-run-id-provided" - } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/vertica/vertica_mces_with_db_golden.json b/metadata-ingestion/tests/integration/vertica/vertica_mces_with_db_golden.json index cd1cd0d7e28a48..b7e0f268aa7fe1 100644 --- a/metadata-ingestion/tests/integration/vertica/vertica_mces_with_db_golden.json +++ b/metadata-ingestion/tests/integration/vertica/vertica_mces_with_db_golden.json @@ -250,7 +250,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2024-06-03 12:24:31.057395+00:00", + "create_time": "2024-06-18 12:46:11.762979+00:00", "table_size": "0 KB" }, "name": "clicks", @@ -287,7 +287,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -300,7 +300,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -313,7 +313,7 @@ "com.linkedin.pegasus2avro.schema.TimeType": {} } }, - "nativeDataType": "TIMESTAMP_WITH_PRECISION()", + "nativeDataType": "TIMESTAMP", "recursive": false, "isPartOfKey": false } @@ -427,7 +427,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2024-06-03 12:23:45.887434+00:00", + "create_time": "2024-06-18 12:45:49.748139+00:00", "table_size": "2119 KB" }, "name": "customer_dimension", @@ -464,7 +464,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": true }, @@ -477,7 +477,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=16)", + "nativeDataType": "VARCHAR(16)", "recursive": false, "isPartOfKey": false }, @@ -490,7 +490,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=256)", + "nativeDataType": "VARCHAR(256)", "recursive": false, "isPartOfKey": false }, @@ -503,7 +503,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=8)", + "nativeDataType": "VARCHAR(8)", "recursive": false, "isPartOfKey": false }, @@ -516,7 +516,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=8)", + "nativeDataType": "VARCHAR(8)", "recursive": false, "isPartOfKey": false }, @@ -529,7 +529,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -542,7 +542,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=256)", + "nativeDataType": "VARCHAR(256)", "recursive": false, "isPartOfKey": false }, @@ -555,7 +555,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "VARCHAR(64)", "recursive": false, "isPartOfKey": false }, @@ -568,7 +568,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=2)", + "nativeDataType": "CHAR(2)", "recursive": false, "isPartOfKey": false }, @@ -581,7 +581,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "VARCHAR(64)", "recursive": false, "isPartOfKey": false }, @@ -594,7 +594,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "VARCHAR(32)", "recursive": false, "isPartOfKey": false }, @@ -607,7 +607,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -620,7 +620,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -633,7 +633,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -646,7 +646,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "VARCHAR(64)", "recursive": false, "isPartOfKey": false }, @@ -659,7 +659,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -672,7 +672,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -685,7 +685,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false }, @@ -698,7 +698,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "VARCHAR(32)", "recursive": false, "isPartOfKey": false }, @@ -711,7 +711,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -724,7 +724,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false } @@ -838,7 +838,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2024-06-03 12:23:45.897002+00:00", + "create_time": "2024-06-18 12:45:49.756539+00:00", "table_size": "145 KB" }, "name": "date_dimension", @@ -875,7 +875,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": true }, @@ -888,7 +888,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false }, @@ -901,7 +901,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=18)", + "nativeDataType": "VARCHAR(18)", "recursive": false, "isPartOfKey": false }, @@ -914,7 +914,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=9)", + "nativeDataType": "VARCHAR(9)", "recursive": false, "isPartOfKey": false }, @@ -927,7 +927,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -940,7 +940,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -953,7 +953,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -966,7 +966,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -979,7 +979,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -992,7 +992,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -1005,7 +1005,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -1018,7 +1018,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=9)", + "nativeDataType": "VARCHAR(9)", "recursive": false, "isPartOfKey": false }, @@ -1031,7 +1031,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -1044,7 +1044,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=7)", + "nativeDataType": "CHAR(7)", "recursive": false, "isPartOfKey": false }, @@ -1057,7 +1057,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -1070,7 +1070,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=7)", + "nativeDataType": "CHAR(7)", "recursive": false, "isPartOfKey": false }, @@ -1083,7 +1083,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -1096,7 +1096,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -1109,7 +1109,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=10)", + "nativeDataType": "VARCHAR(10)", "recursive": false, "isPartOfKey": false }, @@ -1122,7 +1122,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=7)", + "nativeDataType": "CHAR(7)", "recursive": false, "isPartOfKey": false }, @@ -1135,7 +1135,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "VARCHAR(32)", "recursive": false, "isPartOfKey": false } @@ -1249,7 +1249,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2024-06-03 12:23:45.903227+00:00", + "create_time": "2024-06-18 12:45:49.761468+00:00", "table_size": "327 KB" }, "name": "employee_dimension", @@ -1286,7 +1286,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": true }, @@ -1299,7 +1299,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=8)", + "nativeDataType": "VARCHAR(8)", "recursive": false, "isPartOfKey": false }, @@ -1312,7 +1312,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=8)", + "nativeDataType": "VARCHAR(8)", "recursive": false, "isPartOfKey": false }, @@ -1325,7 +1325,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "VARCHAR(64)", "recursive": false, "isPartOfKey": false }, @@ -1338,7 +1338,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=8)", + "nativeDataType": "VARCHAR(8)", "recursive": false, "isPartOfKey": false }, @@ -1351,7 +1351,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "VARCHAR(64)", "recursive": false, "isPartOfKey": false }, @@ -1364,7 +1364,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -1377,7 +1377,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false }, @@ -1390,7 +1390,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=256)", + "nativeDataType": "VARCHAR(256)", "recursive": false, "isPartOfKey": false }, @@ -1403,7 +1403,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "VARCHAR(64)", "recursive": false, "isPartOfKey": false }, @@ -1416,7 +1416,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=2)", + "nativeDataType": "CHAR(2)", "recursive": false, "isPartOfKey": false }, @@ -1429,7 +1429,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=32)", + "nativeDataType": "CHAR(32)", "recursive": false, "isPartOfKey": false }, @@ -1442,7 +1442,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "VARCHAR(64)", "recursive": false, "isPartOfKey": false }, @@ -1455,7 +1455,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -1468,7 +1468,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -1481,7 +1481,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -1494,7 +1494,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "FLOAT()", + "nativeDataType": "FLOAT", "recursive": false, "isPartOfKey": false }, @@ -1507,7 +1507,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false } @@ -1621,7 +1621,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2024-06-03 12:23:45.912348+00:00", + "create_time": "2024-06-18 12:45:49.768272+00:00", "table_size": "2567 KB" }, "name": "inventory_fact", @@ -1658,7 +1658,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -1671,7 +1671,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -1684,7 +1684,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -1697,7 +1697,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -1710,7 +1710,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -1723,7 +1723,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false } @@ -1837,7 +1837,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2024-06-03 12:24:31.194163+00:00", + "create_time": "2024-06-18 12:46:11.881911+00:00", "table_size": "0 KB" }, "name": "phrases", @@ -1874,7 +1874,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=128)", + "nativeDataType": "VARCHAR(128)", "recursive": false, "isPartOfKey": false } @@ -1988,7 +1988,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2024-06-03 12:23:45.890782+00:00", + "create_time": "2024-06-18 12:45:49.751522+00:00", "table_size": "19 KB" }, "name": "product_dimension", @@ -2025,7 +2025,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": true }, @@ -2038,7 +2038,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -2051,7 +2051,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=128)", + "nativeDataType": "VARCHAR(128)", "recursive": false, "isPartOfKey": false }, @@ -2064,7 +2064,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=32)", + "nativeDataType": "CHAR(32)", "recursive": false, "isPartOfKey": false }, @@ -2077,7 +2077,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=32)", + "nativeDataType": "CHAR(32)", "recursive": false, "isPartOfKey": false }, @@ -2090,7 +2090,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=32)", + "nativeDataType": "CHAR(32)", "recursive": false, "isPartOfKey": false }, @@ -2103,7 +2103,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=32)", + "nativeDataType": "CHAR(32)", "recursive": false, "isPartOfKey": false }, @@ -2116,7 +2116,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=32)", + "nativeDataType": "CHAR(32)", "recursive": false, "isPartOfKey": false }, @@ -2129,7 +2129,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -2142,7 +2142,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=32)", + "nativeDataType": "CHAR(32)", "recursive": false, "isPartOfKey": false }, @@ -2155,7 +2155,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -2168,7 +2168,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=32)", + "nativeDataType": "CHAR(32)", "recursive": false, "isPartOfKey": false }, @@ -2181,7 +2181,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -2194,7 +2194,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -2207,7 +2207,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -2220,7 +2220,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -2233,7 +2233,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -2246,7 +2246,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -2259,7 +2259,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -2272,7 +2272,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -2285,7 +2285,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false } @@ -2399,7 +2399,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2024-06-03 12:23:45.893891+00:00", + "create_time": "2024-06-18 12:45:49.754039+00:00", "table_size": "3 KB" }, "name": "promotion_dimension", @@ -2436,7 +2436,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": true }, @@ -2449,7 +2449,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=128)", + "nativeDataType": "VARCHAR(128)", "recursive": false, "isPartOfKey": false }, @@ -2462,7 +2462,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "VARCHAR(32)", "recursive": false, "isPartOfKey": false }, @@ -2475,7 +2475,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "VARCHAR(32)", "recursive": false, "isPartOfKey": false }, @@ -2488,7 +2488,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "VARCHAR(32)", "recursive": false, "isPartOfKey": false }, @@ -2501,7 +2501,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "VARCHAR(32)", "recursive": false, "isPartOfKey": false }, @@ -2514,7 +2514,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "VARCHAR(32)", "recursive": false, "isPartOfKey": false }, @@ -2527,7 +2527,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "VARCHAR(32)", "recursive": false, "isPartOfKey": false }, @@ -2540,7 +2540,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=128)", + "nativeDataType": "VARCHAR(128)", "recursive": false, "isPartOfKey": false }, @@ -2553,7 +2553,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -2566,7 +2566,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false }, @@ -2579,7 +2579,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false } @@ -2693,7 +2693,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2024-06-03 12:24:31.046829+00:00", + "create_time": "2024-06-18 12:46:11.751917+00:00", "table_size": "0 KB" }, "name": "readings", @@ -2730,7 +2730,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -2743,7 +2743,7 @@ "com.linkedin.pegasus2avro.schema.TimeType": {} } }, - "nativeDataType": "TIMESTAMP_WITH_PRECISION()", + "nativeDataType": "TIMESTAMP", "recursive": false, "isPartOfKey": false }, @@ -2756,7 +2756,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "FLOAT()", + "nativeDataType": "FLOAT", "recursive": false, "isPartOfKey": false } @@ -2870,7 +2870,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2024-06-03 12:23:45.906471+00:00", + "create_time": "2024-06-18 12:45:49.763788+00:00", "table_size": "1 KB" }, "name": "shipping_dimension", @@ -2907,7 +2907,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": true }, @@ -2920,7 +2920,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=30)", + "nativeDataType": "CHAR(30)", "recursive": false, "isPartOfKey": false }, @@ -2933,7 +2933,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=10)", + "nativeDataType": "CHAR(10)", "recursive": false, "isPartOfKey": false }, @@ -2946,7 +2946,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=20)", + "nativeDataType": "CHAR(20)", "recursive": false, "isPartOfKey": false } @@ -3060,7 +3060,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2024-06-03 12:23:45.900185+00:00", + "create_time": "2024-06-18 12:45:49.759103+00:00", "table_size": "1 KB" }, "name": "vendor_dimension", @@ -3097,7 +3097,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": true }, @@ -3110,7 +3110,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "VARCHAR(64)", "recursive": false, "isPartOfKey": false }, @@ -3123,7 +3123,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "VARCHAR(64)", "recursive": false, "isPartOfKey": false }, @@ -3136,7 +3136,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "VARCHAR(64)", "recursive": false, "isPartOfKey": false }, @@ -3149,7 +3149,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=2)", + "nativeDataType": "CHAR(2)", "recursive": false, "isPartOfKey": false }, @@ -3162,7 +3162,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "VARCHAR(32)", "recursive": false, "isPartOfKey": false }, @@ -3175,7 +3175,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -3188,7 +3188,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false } @@ -3302,7 +3302,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2024-06-03 12:24:30.233405+00:00", + "create_time": "2024-06-18 12:46:11.212820+00:00", "table_size": "0 KB" }, "name": "vmart_load_success", @@ -3339,7 +3339,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false } @@ -3453,7 +3453,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2024-06-03 12:23:45.909432+00:00", + "create_time": "2024-06-18 12:45:49.766036+00:00", "table_size": "2 KB" }, "name": "warehouse_dimension", @@ -3490,7 +3490,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": true }, @@ -3503,7 +3503,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=20)", + "nativeDataType": "VARCHAR(20)", "recursive": false, "isPartOfKey": false }, @@ -3516,7 +3516,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=256)", + "nativeDataType": "VARCHAR(256)", "recursive": false, "isPartOfKey": false }, @@ -3529,7 +3529,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=60)", + "nativeDataType": "VARCHAR(60)", "recursive": false, "isPartOfKey": false }, @@ -3542,7 +3542,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=2)", + "nativeDataType": "CHAR(2)", "recursive": false, "isPartOfKey": false }, @@ -3555,7 +3555,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "VARCHAR(32)", "recursive": false, "isPartOfKey": false } @@ -3669,7 +3669,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2024-06-03 12:24:31.075640+00:00", + "create_time": "2024-06-18 12:46:11.778456+00:00", "table_size": "0 KB", "view_definition": "SELECT sum(customer_dimension.annual_income) AS SUM, customer_dimension.customer_state FROM public.customer_dimension WHERE (customer_dimension.customer_key IN (SELECT store_sales_fact.customer_key FROM store.store_sales_fact)) GROUP BY customer_dimension.customer_state ORDER BY customer_dimension.customer_state", "is_view": "True" @@ -3708,7 +3708,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -3721,7 +3721,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=2)", + "nativeDataType": "CHAR(2)", "recursive": false, "isPartOfKey": false } @@ -3962,7 +3962,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2024-06-03 12:23:45.918904+00:00", + "create_time": "2024-06-18 12:45:49.773986+00:00", "table_size": "2 KB" }, "name": "store_dimension", @@ -3999,7 +3999,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": true }, @@ -4012,7 +4012,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "VARCHAR(64)", "recursive": false, "isPartOfKey": false }, @@ -4025,7 +4025,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -4038,7 +4038,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=256)", + "nativeDataType": "VARCHAR(256)", "recursive": false, "isPartOfKey": false }, @@ -4051,7 +4051,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "VARCHAR(64)", "recursive": false, "isPartOfKey": false }, @@ -4064,7 +4064,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=2)", + "nativeDataType": "CHAR(2)", "recursive": false, "isPartOfKey": false }, @@ -4077,7 +4077,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "VARCHAR(64)", "recursive": false, "isPartOfKey": false }, @@ -4090,7 +4090,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "VARCHAR(32)", "recursive": false, "isPartOfKey": false }, @@ -4103,7 +4103,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "VARCHAR(32)", "recursive": false, "isPartOfKey": false }, @@ -4116,7 +4116,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "VARCHAR(32)", "recursive": false, "isPartOfKey": false }, @@ -4129,7 +4129,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -4142,7 +4142,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -4155,7 +4155,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false }, @@ -4168,7 +4168,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false }, @@ -4181,7 +4181,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -4194,7 +4194,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -4207,7 +4207,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -4220,7 +4220,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false } @@ -4334,7 +4334,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2024-06-03 12:23:45.929154+00:00", + "create_time": "2024-06-18 12:45:49.781744+00:00", "table_size": "8646 KB" }, "name": "store_orders_fact", @@ -4371,7 +4371,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -4384,7 +4384,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -4397,7 +4397,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -4410,7 +4410,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -4423,7 +4423,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -4436,7 +4436,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -4449,7 +4449,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false }, @@ -4462,7 +4462,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false }, @@ -4475,7 +4475,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false }, @@ -4488,7 +4488,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false }, @@ -4501,7 +4501,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -4514,7 +4514,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -4527,7 +4527,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "VARCHAR(32)", "recursive": false, "isPartOfKey": false }, @@ -4540,7 +4540,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -4553,7 +4553,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -4566,7 +4566,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -4579,7 +4579,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -4592,7 +4592,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -4605,7 +4605,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false } @@ -4719,7 +4719,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2024-06-03 12:23:45.922050+00:00", + "create_time": "2024-06-18 12:45:49.776427+00:00", "table_size": "225096 KB" }, "name": "store_sales_fact", @@ -4756,7 +4756,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -4769,7 +4769,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -4782,7 +4782,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -4795,7 +4795,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -4808,7 +4808,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -4821,7 +4821,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -4834,7 +4834,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -4847,7 +4847,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -4860,7 +4860,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -4873,7 +4873,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -4886,7 +4886,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -4899,7 +4899,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -4912,7 +4912,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=16)", + "nativeDataType": "VARCHAR(16)", "recursive": false, "isPartOfKey": false }, @@ -4925,7 +4925,7 @@ "com.linkedin.pegasus2avro.schema.TimeType": {} } }, - "nativeDataType": "TIME()", + "nativeDataType": "TIME", "recursive": false, "isPartOfKey": false }, @@ -4938,7 +4938,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=8)", + "nativeDataType": "VARCHAR(8)", "recursive": false, "isPartOfKey": false }, @@ -4951,7 +4951,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false }, @@ -4964,7 +4964,7 @@ "com.linkedin.pegasus2avro.schema.TimeType": {} } }, - "nativeDataType": "TIMESTAMP_WITH_PRECISION()", + "nativeDataType": "TIMESTAMP", "recursive": false, "isPartOfKey": false } @@ -5187,7 +5187,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2024-06-03 12:23:45.938730+00:00", + "create_time": "2024-06-18 12:45:49.789350+00:00", "table_size": "6 KB" }, "name": "call_center_dimension", @@ -5224,7 +5224,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": true }, @@ -5237,7 +5237,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false }, @@ -5250,7 +5250,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false }, @@ -5263,7 +5263,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=50)", + "nativeDataType": "VARCHAR(50)", "recursive": false, "isPartOfKey": false }, @@ -5276,7 +5276,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=50)", + "nativeDataType": "VARCHAR(50)", "recursive": false, "isPartOfKey": false }, @@ -5289,7 +5289,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -5302,7 +5302,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=20)", + "nativeDataType": "CHAR(20)", "recursive": false, "isPartOfKey": false }, @@ -5315,7 +5315,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=40)", + "nativeDataType": "VARCHAR(40)", "recursive": false, "isPartOfKey": false }, @@ -5328,7 +5328,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=256)", + "nativeDataType": "VARCHAR(256)", "recursive": false, "isPartOfKey": false }, @@ -5341,7 +5341,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "VARCHAR(64)", "recursive": false, "isPartOfKey": false }, @@ -5354,7 +5354,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=2)", + "nativeDataType": "CHAR(2)", "recursive": false, "isPartOfKey": false }, @@ -5367,7 +5367,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "VARCHAR(64)", "recursive": false, "isPartOfKey": false } @@ -5481,7 +5481,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2024-06-03 12:23:45.935745+00:00", + "create_time": "2024-06-18 12:45:49.786989+00:00", "table_size": "9 KB" }, "name": "online_page_dimension", @@ -5518,7 +5518,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": true }, @@ -5531,7 +5531,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false }, @@ -5544,7 +5544,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false }, @@ -5557,7 +5557,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -5570,7 +5570,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=100)", + "nativeDataType": "VARCHAR(100)", "recursive": false, "isPartOfKey": false }, @@ -5583,7 +5583,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=100)", + "nativeDataType": "VARCHAR(100)", "recursive": false, "isPartOfKey": false } @@ -5697,7 +5697,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2024-06-03 12:23:45.941712+00:00", + "create_time": "2024-06-18 12:45:49.791761+00:00", "table_size": "182385 KB" }, "name": "online_sales_fact", @@ -5734,7 +5734,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -5747,7 +5747,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -5760,7 +5760,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -5773,7 +5773,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -5786,7 +5786,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -5799,7 +5799,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -5812,7 +5812,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -5825,7 +5825,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -5838,7 +5838,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -5851,7 +5851,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -5864,7 +5864,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -5877,7 +5877,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -5890,7 +5890,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "FLOAT()", + "nativeDataType": "FLOAT", "recursive": false, "isPartOfKey": false }, @@ -5903,7 +5903,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "FLOAT()", + "nativeDataType": "FLOAT", "recursive": false, "isPartOfKey": false }, @@ -5916,7 +5916,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "FLOAT()", + "nativeDataType": "FLOAT", "recursive": false, "isPartOfKey": false }, @@ -5929,7 +5929,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "FLOAT()", + "nativeDataType": "FLOAT", "recursive": false, "isPartOfKey": false }, @@ -5942,7 +5942,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "FLOAT()", + "nativeDataType": "FLOAT", "recursive": false, "isPartOfKey": false }, @@ -5955,7 +5955,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=16)", + "nativeDataType": "VARCHAR(16)", "recursive": false, "isPartOfKey": false }, @@ -5968,7 +5968,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false }, @@ -5981,7 +5981,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false } @@ -6052,7 +6052,7 @@ "env": "PROD", "database": "vmart", "cluster_type": "Enterprise", - "cluster_size": "52 GB", + "cluster_size": "243 GB", "subcluster": " ", "communal_storage_path": "" }, @@ -6129,7 +6129,7 @@ "schema": "public", "projection_count": "12", "udx_list": "APPROXIMATE_COUNT_DISTINCT_SYNOPSIS_INFO, APPROXIMATE_MEDIAN, APPROXIMATE_PERCENTILE, AcdDataToCount, AcdDataToLongSyn, AcdDataToSyn, AcdSynToCount, AcdSynToSyn, DelimitedExport, DelimitedExportMulti, EmptyMap, Explode, FAvroParser, FCefParser, FCsvParser, FDelimitedPairParser, FDelimitedParser, FIDXParser, FJSONParser, FRegexParser, FlexTokenizer, JsonExport, JsonExportMulti, KafkaAvroParser, KafkaCheckBrokers, KafkaExport, KafkaInsertDelimiters, KafkaInsertLengths, KafkaJsonParser, KafkaListManyTopics, KafkaListTopics, KafkaOffsets, KafkaParser, KafkaSource, KafkaTopicDetails, MSE, MapAggregate, MapAggregate, MapContainsKey, MapContainsKey, MapContainsValue, MapContainsValue, MapDelimitedExtractor, MapItems, MapItems, MapJSONExtractor, MapKeys, MapKeys, MapKeysInfo, MapKeysInfo, MapLookup, MapLookup, MapLookup, MapPut, MapRegexExtractor, MapSize, MapSize, MapToString, MapToString, MapValues, MapValues, MapValuesOrField, MapVersion, MapVersion, OrcExport, OrcExportMulti, PRC, ParquetExport, ParquetExportMulti, PickBestType, PickBestType, PickBestType, ROC, STV_AsGeoJSON, STV_AsGeoJSON, STV_AsGeoJSON, STV_Create_Index, STV_Create_Index, STV_Create_Index, STV_DWithin, STV_DWithin, STV_DWithin, STV_Describe_Index, STV_Drop_Index, STV_Export2Shapefile, STV_Extent, STV_Extent, STV_ForceLHR, STV_Geography, STV_Geography, STV_GeographyPoint, STV_Geometry, STV_Geometry, STV_GeometryPoint, STV_GeometryPoint, STV_GetExportShapefileDirectory, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_IsValidReason, STV_IsValidReason, STV_IsValidReason, STV_LineStringPoint, STV_LineStringPoint, STV_LineStringPoint, STV_MemSize, STV_MemSize, STV_MemSize, STV_NN, STV_NN, STV_NN, STV_PolygonPoint, STV_PolygonPoint, STV_PolygonPoint, STV_Refresh_Index, STV_Refresh_Index, STV_Refresh_Index, STV_Rename_Index, STV_Reverse, STV_SetExportShapefileDirectory, STV_ShpCreateTable, STV_ShpParser, STV_ShpSource, ST_Area, ST_Area, ST_Area, ST_AsBinary, ST_AsBinary, ST_AsBinary, ST_AsText, ST_AsText, ST_AsText, ST_Boundary, ST_Buffer, ST_Centroid, ST_Contains, ST_Contains, ST_Contains, ST_ConvexHull, ST_Crosses, ST_Difference, ST_Disjoint, ST_Disjoint, ST_Disjoint, ST_Distance, ST_Distance, ST_Distance, ST_Envelope, ST_Equals, ST_Equals, ST_Equals, ST_GeoHash, ST_GeoHash, ST_GeoHash, ST_GeographyFromText, ST_GeographyFromWKB, ST_GeomFromGeoHash, ST_GeomFromGeoJSON, ST_GeomFromGeoJSON, ST_GeomFromText, ST_GeomFromText, ST_GeomFromWKB, ST_GeomFromWKB, ST_GeometryN, ST_GeometryN, ST_GeometryN, ST_GeometryType, ST_GeometryType, ST_GeometryType, ST_Intersection, ST_Intersects, ST_Intersects, ST_IsEmpty, ST_IsEmpty, ST_IsEmpty, ST_IsSimple, ST_IsSimple, ST_IsSimple, ST_IsValid, ST_IsValid, ST_IsValid, ST_Length, ST_Length, ST_Length, ST_NumGeometries, ST_NumGeometries, ST_NumGeometries, ST_NumPoints, ST_NumPoints, ST_NumPoints, ST_Overlaps, ST_PointFromGeoHash, ST_PointN, ST_PointN, ST_PointN, ST_Relate, ST_SRID, ST_SRID, ST_SRID, ST_Simplify, ST_SimplifyPreserveTopology, ST_SymDifference, ST_Touches, ST_Touches, ST_Touches, ST_Transform, ST_Union, ST_Union, ST_Within, ST_Within, ST_Within, ST_X, ST_X, ST_X, ST_XMax, ST_XMax, ST_XMax, ST_XMin, ST_XMin, ST_XMin, ST_Y, ST_Y, ST_Y, ST_YMax, ST_YMax, ST_YMax, ST_YMin, ST_YMin, ST_YMin, ST_intersects, SetMapKeys, Summarize_CatCol, Summarize_CatCol, Summarize_CatCol, Summarize_CatCol, Summarize_CatCol, Summarize_NumCol, Unnest, VoltageSecureAccess, VoltageSecureAccess, VoltageSecureConfigure, VoltageSecureConfigureGlobal, VoltageSecureProtect, VoltageSecureProtect, VoltageSecureProtectAllKeys, VoltageSecureRefreshPolicy, VoltageSecureVersion, append_centers, apply_bisecting_kmeans, apply_iforest, apply_inverse_pca, apply_inverse_svd, apply_kmeans, apply_kprototypes, apply_normalize, apply_one_hot_encoder, apply_pca, apply_svd, approximate_quantiles, ar_create_blobs, ar_final_newton, ar_save_model, ar_transition_newton, arima_bfgs, arima_line_search, arima_save_model, avg_all_columns_local, bisecting_kmeans_init_model, bk_apply_best_kmeans_results, bk_compute_totss_local, bk_finalize_model, bk_get_rows_in_active_cluster, bk_kmeans_compute_local_centers, bk_kmeans_compute_withinss, bk_kmeans_fast_random_init, bk_kmeans_slow_random_init, bk_kmeanspp_init_cur_cluster, bk_kmeanspp_reset_blob, bk_kmeanspp_select_new_centers, bk_kmeanspp_within_chunk_sum, bk_save_final_model, bk_write_new_cluster_level, blob_to_table, bufUdx, bufUdx, calc_pseudo_centers, calculate_alpha_linear, calculate_hessian_linear1, calculate_hessian_linear2, chi_squared, cleanup_kmeans_files, compute_and_save_global_center, compute_and_save_new_centers, compute_local_totss, compute_local_withinss, compute_new_local_centers, confusion_matrix, coordinate_descent_covariance, corr_matrix, count_rows_in_blob, create_aggregator_blob, error_rate, evaluate_naive_bayes_model, evaluate_reg_model, evaluate_svm_model, export_model_files, finalize_blob_resource_group, get_attr_minmax, get_attr_robust_zscore, get_attr_zscore, get_model_attribute, get_model_summary, get_robust_zscore_median, iforest_create_blobs, iforest_phase0_udf1, iforest_phase0_udf2, iforest_phase1_udf1, iforest_phase1_udf2, iforest_phase1_udf3, iforest_phase1_udf4, iforest_phase2_udf1, iforest_phase2_udf2, iforest_phase2_udf3, iforest_phase2_udf4, iforest_save_model, import_model_files, isOrContains, kmeansAddMetricsToModel, kmeans_init_blobs, kmeans_to_write_final_centers, lift_table, line_search_logistic1, line_search_logistic2, load_rows_into_blocks, map_factor, math_op, matrix_global_xtx, matrix_local_xtx, mode_finder, model_converter, naive_bayes_phase1, naive_bayes_phase1_blob, naive_bayes_phase2, pca_prep1_global, pca_prep1_local, pca_prep2, pmml_parser, predict_arima, predict_autoregressor, predict_linear_reg, predict_logistic_reg, predict_moving_average, predict_naive_bayes, predict_naive_bayes_classes, predict_pmml, predict_poisson_reg, predict_rf_classifier, predict_rf_classifier_classes, predict_rf_regressor, predict_svm_classifier, predict_svm_regressor, predict_xgb_classifier, predict_xgb_classifier_classes, predict_xgb_regressor, random_init, random_init_write, read_from_dfblob, read_map_factor, read_ptree, read_tree, reg_final_bfgs, reg_final_newton, reg_transition_bfgs, reg_transition_newton, reg_write_model, remove_blob, reverse_normalize, rf_blob, rf_clean, rf_phase0_udf1, rf_phase0_udf2, rf_phase1_udf1, rf_phase1_udf2, rf_phase1_udf3, rf_phase1_udf4, rf_phase2_udf1, rf_phase2_udf2, rf_phase2_udf3, rf_phase2_udf4, rf_predictor_importance, rf_save_model, rsquared, save_cv_result, save_pca_model, save_svd_model, save_svm_model, select_new_centers, store_minmax_model, store_one_hot_encoder_model, store_robust_zscore_model, store_zscore_model, table_to_blob, table_to_dfblob, tokenize, topk, update_and_return_sum_of_squared_distances, upgrade_model_format, writeInitialKmeansModelToDfs, xgb_create_blobs, xgb_phase0_udf1, xgb_phase0_udf2, xgb_phase1_udf1, xgb_phase1_udf2, xgb_phase1_udf3, xgb_phase2_udf1, xgb_phase2_udf2, xgb_phase2_udf3, xgb_predictor_importance, xgb_prune, xgb_save_model, yule_walker, ", - "udx_language": "ComplexTypesLib -- Functions for Complex Types | DelimitedExportLib -- Delimited data export package | JsonExportLib -- Json data export package | MachineLearningLib -- Machine learning package | OrcExportLib -- Orc export package | ParquetExportLib -- Parquet export package | ApproximateLib -- Approximate package | FlexTableLib -- Flexible Tables Data Load and Query | KafkaLib -- Kafka streaming load and export | PlaceLib -- Geospatial package | VoltageSecureLib -- Voltage SecureData Connector | TransformFunctions -- User-defined Python library | " + "udx_language": "ApproximateLib -- Approximate package | FlexTableLib -- Flexible Tables Data Load and Query | OrcExportLib -- Orc export package | JsonExportLib -- Json data export package | PlaceLib -- Geospatial package | ParquetExportLib -- Parquet export package | ComplexTypesLib -- Functions for Complex Types | VoltageSecureLib -- Voltage SecureData Connector | KafkaLib -- Kafka streaming load and export | MachineLearningLib -- Machine learning package | DelimitedExportLib -- Delimited data export package | TransformFunctions -- User-defined Python library | " }, "name": "public" } @@ -6304,7 +6304,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -6317,7 +6317,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false }, @@ -6330,7 +6330,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=18)", + "nativeDataType": "VARCHAR(18)", "recursive": false, "isPartOfKey": false }, @@ -6343,7 +6343,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=9)", + "nativeDataType": "VARCHAR(9)", "recursive": false, "isPartOfKey": false }, @@ -6356,7 +6356,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -6369,7 +6369,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -6382,7 +6382,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -6395,7 +6395,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -6408,7 +6408,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -6421,7 +6421,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -6434,7 +6434,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -6447,7 +6447,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=9)", + "nativeDataType": "VARCHAR(9)", "recursive": false, "isPartOfKey": false }, @@ -6460,7 +6460,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -6473,7 +6473,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=7)", + "nativeDataType": "CHAR(7)", "recursive": false, "isPartOfKey": false }, @@ -6486,7 +6486,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -6499,7 +6499,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=7)", + "nativeDataType": "CHAR(7)", "recursive": false, "isPartOfKey": false }, @@ -6512,7 +6512,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -6525,7 +6525,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -6538,7 +6538,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=10)", + "nativeDataType": "VARCHAR(10)", "recursive": false, "isPartOfKey": false }, @@ -6551,7 +6551,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=7)", + "nativeDataType": "CHAR(7)", "recursive": false, "isPartOfKey": false }, @@ -6564,7 +6564,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "VARCHAR(32)", "recursive": false, "isPartOfKey": false } @@ -6721,7 +6721,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -6734,7 +6734,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -6747,7 +6747,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=128)", + "nativeDataType": "VARCHAR(128)", "recursive": false, "isPartOfKey": false }, @@ -6760,7 +6760,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=32)", + "nativeDataType": "CHAR(32)", "recursive": false, "isPartOfKey": false }, @@ -6773,7 +6773,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=32)", + "nativeDataType": "CHAR(32)", "recursive": false, "isPartOfKey": false }, @@ -6786,7 +6786,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=32)", + "nativeDataType": "CHAR(32)", "recursive": false, "isPartOfKey": false }, @@ -6799,7 +6799,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=32)", + "nativeDataType": "CHAR(32)", "recursive": false, "isPartOfKey": false }, @@ -6812,7 +6812,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=32)", + "nativeDataType": "CHAR(32)", "recursive": false, "isPartOfKey": false }, @@ -6825,7 +6825,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -6838,7 +6838,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=32)", + "nativeDataType": "CHAR(32)", "recursive": false, "isPartOfKey": false }, @@ -6851,7 +6851,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -6864,7 +6864,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=32)", + "nativeDataType": "CHAR(32)", "recursive": false, "isPartOfKey": false }, @@ -6877,7 +6877,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -6890,7 +6890,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -6903,7 +6903,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -6916,7 +6916,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -6929,7 +6929,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -6942,7 +6942,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -6955,7 +6955,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -6968,7 +6968,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -6981,7 +6981,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false } @@ -7138,7 +7138,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -7151,7 +7151,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=128)", + "nativeDataType": "VARCHAR(128)", "recursive": false, "isPartOfKey": false }, @@ -7164,7 +7164,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "VARCHAR(32)", "recursive": false, "isPartOfKey": false }, @@ -7177,7 +7177,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "VARCHAR(32)", "recursive": false, "isPartOfKey": false }, @@ -7190,7 +7190,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "VARCHAR(32)", "recursive": false, "isPartOfKey": false }, @@ -7203,7 +7203,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "VARCHAR(32)", "recursive": false, "isPartOfKey": false }, @@ -7216,7 +7216,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "VARCHAR(32)", "recursive": false, "isPartOfKey": false }, @@ -7229,7 +7229,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "VARCHAR(32)", "recursive": false, "isPartOfKey": false }, @@ -7242,7 +7242,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=128)", + "nativeDataType": "VARCHAR(128)", "recursive": false, "isPartOfKey": false }, @@ -7255,7 +7255,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -7268,7 +7268,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false }, @@ -7281,7 +7281,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false } @@ -7438,7 +7438,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -7451,7 +7451,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "VARCHAR(64)", "recursive": false, "isPartOfKey": false }, @@ -7464,7 +7464,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "VARCHAR(64)", "recursive": false, "isPartOfKey": false }, @@ -7477,7 +7477,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "VARCHAR(64)", "recursive": false, "isPartOfKey": false }, @@ -7490,7 +7490,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=2)", + "nativeDataType": "CHAR(2)", "recursive": false, "isPartOfKey": false }, @@ -7503,7 +7503,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "VARCHAR(32)", "recursive": false, "isPartOfKey": false }, @@ -7516,7 +7516,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -7529,7 +7529,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false } @@ -7686,7 +7686,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -7699,7 +7699,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=16)", + "nativeDataType": "VARCHAR(16)", "recursive": false, "isPartOfKey": false }, @@ -7712,7 +7712,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=256)", + "nativeDataType": "VARCHAR(256)", "recursive": false, "isPartOfKey": false }, @@ -7725,7 +7725,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=8)", + "nativeDataType": "VARCHAR(8)", "recursive": false, "isPartOfKey": false }, @@ -7738,7 +7738,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=8)", + "nativeDataType": "VARCHAR(8)", "recursive": false, "isPartOfKey": false }, @@ -7751,7 +7751,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -7764,7 +7764,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=256)", + "nativeDataType": "VARCHAR(256)", "recursive": false, "isPartOfKey": false }, @@ -7777,7 +7777,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "VARCHAR(64)", "recursive": false, "isPartOfKey": false }, @@ -7790,7 +7790,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=2)", + "nativeDataType": "CHAR(2)", "recursive": false, "isPartOfKey": false }, @@ -7803,7 +7803,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "VARCHAR(64)", "recursive": false, "isPartOfKey": false }, @@ -7816,7 +7816,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "VARCHAR(32)", "recursive": false, "isPartOfKey": false }, @@ -7829,7 +7829,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -7842,7 +7842,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -7855,7 +7855,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -7868,7 +7868,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "VARCHAR(64)", "recursive": false, "isPartOfKey": false }, @@ -7881,7 +7881,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -7894,7 +7894,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -7907,7 +7907,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false }, @@ -7920,7 +7920,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "VARCHAR(32)", "recursive": false, "isPartOfKey": false }, @@ -7933,7 +7933,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -7946,7 +7946,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false } @@ -8103,7 +8103,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -8116,7 +8116,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=8)", + "nativeDataType": "VARCHAR(8)", "recursive": false, "isPartOfKey": false }, @@ -8129,7 +8129,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=8)", + "nativeDataType": "VARCHAR(8)", "recursive": false, "isPartOfKey": false }, @@ -8142,7 +8142,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "VARCHAR(64)", "recursive": false, "isPartOfKey": false }, @@ -8155,7 +8155,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=8)", + "nativeDataType": "VARCHAR(8)", "recursive": false, "isPartOfKey": false }, @@ -8168,7 +8168,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "VARCHAR(64)", "recursive": false, "isPartOfKey": false }, @@ -8181,7 +8181,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -8194,7 +8194,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false }, @@ -8207,7 +8207,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=256)", + "nativeDataType": "VARCHAR(256)", "recursive": false, "isPartOfKey": false }, @@ -8220,7 +8220,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "VARCHAR(64)", "recursive": false, "isPartOfKey": false }, @@ -8233,7 +8233,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=2)", + "nativeDataType": "CHAR(2)", "recursive": false, "isPartOfKey": false }, @@ -8246,7 +8246,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=32)", + "nativeDataType": "CHAR(32)", "recursive": false, "isPartOfKey": false }, @@ -8259,7 +8259,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "VARCHAR(64)", "recursive": false, "isPartOfKey": false }, @@ -8272,7 +8272,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -8285,7 +8285,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -8298,7 +8298,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -8311,7 +8311,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "FLOAT()", + "nativeDataType": "FLOAT", "recursive": false, "isPartOfKey": false }, @@ -8324,7 +8324,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false } @@ -8481,7 +8481,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -8494,7 +8494,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=20)", + "nativeDataType": "VARCHAR(20)", "recursive": false, "isPartOfKey": false }, @@ -8507,7 +8507,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=256)", + "nativeDataType": "VARCHAR(256)", "recursive": false, "isPartOfKey": false }, @@ -8520,7 +8520,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=60)", + "nativeDataType": "VARCHAR(60)", "recursive": false, "isPartOfKey": false }, @@ -8533,7 +8533,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=2)", + "nativeDataType": "CHAR(2)", "recursive": false, "isPartOfKey": false }, @@ -8546,7 +8546,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "VARCHAR(32)", "recursive": false, "isPartOfKey": false } @@ -8703,7 +8703,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -8716,7 +8716,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=30)", + "nativeDataType": "CHAR(30)", "recursive": false, "isPartOfKey": false }, @@ -8729,7 +8729,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=10)", + "nativeDataType": "CHAR(10)", "recursive": false, "isPartOfKey": false }, @@ -8742,7 +8742,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=20)", + "nativeDataType": "CHAR(20)", "recursive": false, "isPartOfKey": false } @@ -8899,7 +8899,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -8912,7 +8912,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -8925,7 +8925,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -8938,7 +8938,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -8951,7 +8951,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -8964,7 +8964,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false } @@ -9121,7 +9121,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -9134,7 +9134,7 @@ "com.linkedin.pegasus2avro.schema.TimeType": {} } }, - "nativeDataType": "TIMESTAMP_WITH_PRECISION()", + "nativeDataType": "TIMESTAMP", "recursive": false, "isPartOfKey": false }, @@ -9147,7 +9147,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "FLOAT()", + "nativeDataType": "FLOAT", "recursive": false, "isPartOfKey": false } @@ -9304,7 +9304,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false } @@ -9461,7 +9461,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=128)", + "nativeDataType": "VARCHAR(128)", "recursive": false, "isPartOfKey": false } @@ -9709,7 +9709,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -9722,7 +9722,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "VARCHAR(64)", "recursive": false, "isPartOfKey": false }, @@ -9735,7 +9735,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -9748,7 +9748,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=256)", + "nativeDataType": "VARCHAR(256)", "recursive": false, "isPartOfKey": false }, @@ -9761,7 +9761,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "VARCHAR(64)", "recursive": false, "isPartOfKey": false }, @@ -9774,7 +9774,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=2)", + "nativeDataType": "CHAR(2)", "recursive": false, "isPartOfKey": false }, @@ -9787,7 +9787,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "VARCHAR(64)", "recursive": false, "isPartOfKey": false }, @@ -9800,7 +9800,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "VARCHAR(32)", "recursive": false, "isPartOfKey": false }, @@ -9813,7 +9813,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "VARCHAR(32)", "recursive": false, "isPartOfKey": false }, @@ -9826,7 +9826,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "VARCHAR(32)", "recursive": false, "isPartOfKey": false }, @@ -9839,7 +9839,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -9852,7 +9852,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -9865,7 +9865,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false }, @@ -9878,7 +9878,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false }, @@ -9891,7 +9891,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -9904,7 +9904,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -9917,7 +9917,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -9930,7 +9930,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false } @@ -10087,7 +10087,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -10100,7 +10100,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -10113,7 +10113,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -10126,7 +10126,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -10139,7 +10139,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -10152,7 +10152,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -10165,7 +10165,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -10178,7 +10178,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -10191,7 +10191,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -10204,7 +10204,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -10217,7 +10217,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -10230,7 +10230,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -10243,7 +10243,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=16)", + "nativeDataType": "VARCHAR(16)", "recursive": false, "isPartOfKey": false }, @@ -10256,7 +10256,7 @@ "com.linkedin.pegasus2avro.schema.TimeType": {} } }, - "nativeDataType": "TIME()", + "nativeDataType": "TIME", "recursive": false, "isPartOfKey": false }, @@ -10269,7 +10269,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=8)", + "nativeDataType": "VARCHAR(8)", "recursive": false, "isPartOfKey": false }, @@ -10282,7 +10282,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false }, @@ -10295,7 +10295,7 @@ "com.linkedin.pegasus2avro.schema.TimeType": {} } }, - "nativeDataType": "TIMESTAMP_WITH_PRECISION()", + "nativeDataType": "TIMESTAMP", "recursive": false, "isPartOfKey": false } @@ -10452,7 +10452,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -10465,7 +10465,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -10478,7 +10478,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -10491,7 +10491,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -10504,7 +10504,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -10517,7 +10517,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -10530,7 +10530,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false }, @@ -10543,7 +10543,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false }, @@ -10556,7 +10556,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false }, @@ -10569,7 +10569,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false }, @@ -10582,7 +10582,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -10595,7 +10595,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -10608,7 +10608,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "VARCHAR(32)", "recursive": false, "isPartOfKey": false }, @@ -10621,7 +10621,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -10634,7 +10634,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -10647,7 +10647,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -10660,7 +10660,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -10673,7 +10673,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -10686,7 +10686,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false } @@ -10934,7 +10934,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -10947,7 +10947,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false }, @@ -10960,7 +10960,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false }, @@ -10973,7 +10973,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -10986,7 +10986,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=100)", + "nativeDataType": "VARCHAR(100)", "recursive": false, "isPartOfKey": false }, @@ -10999,7 +10999,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=100)", + "nativeDataType": "VARCHAR(100)", "recursive": false, "isPartOfKey": false } @@ -11156,7 +11156,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -11169,7 +11169,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false }, @@ -11182,7 +11182,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false }, @@ -11195,7 +11195,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=50)", + "nativeDataType": "VARCHAR(50)", "recursive": false, "isPartOfKey": false }, @@ -11208,7 +11208,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=50)", + "nativeDataType": "VARCHAR(50)", "recursive": false, "isPartOfKey": false }, @@ -11221,7 +11221,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -11234,7 +11234,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=20)", + "nativeDataType": "CHAR(20)", "recursive": false, "isPartOfKey": false }, @@ -11247,7 +11247,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=40)", + "nativeDataType": "VARCHAR(40)", "recursive": false, "isPartOfKey": false }, @@ -11260,7 +11260,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=256)", + "nativeDataType": "VARCHAR(256)", "recursive": false, "isPartOfKey": false }, @@ -11273,7 +11273,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "VARCHAR(64)", "recursive": false, "isPartOfKey": false }, @@ -11286,7 +11286,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=2)", + "nativeDataType": "CHAR(2)", "recursive": false, "isPartOfKey": false }, @@ -11299,7 +11299,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "VARCHAR(64)", "recursive": false, "isPartOfKey": false } @@ -11456,7 +11456,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -11469,7 +11469,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -11482,7 +11482,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -11495,7 +11495,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -11508,7 +11508,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -11521,7 +11521,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -11534,7 +11534,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -11547,7 +11547,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -11560,7 +11560,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -11573,7 +11573,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -11586,7 +11586,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -11599,7 +11599,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "INTEGER", "recursive": false, "isPartOfKey": false }, @@ -11612,7 +11612,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "FLOAT()", + "nativeDataType": "FLOAT", "recursive": false, "isPartOfKey": false }, @@ -11625,7 +11625,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "FLOAT()", + "nativeDataType": "FLOAT", "recursive": false, "isPartOfKey": false }, @@ -11638,7 +11638,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "FLOAT()", + "nativeDataType": "FLOAT", "recursive": false, "isPartOfKey": false }, @@ -11651,7 +11651,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "FLOAT()", + "nativeDataType": "FLOAT", "recursive": false, "isPartOfKey": false }, @@ -11664,7 +11664,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "FLOAT()", + "nativeDataType": "FLOAT", "recursive": false, "isPartOfKey": false }, @@ -11677,7 +11677,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=16)", + "nativeDataType": "VARCHAR(16)", "recursive": false, "isPartOfKey": false }, @@ -11690,7 +11690,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false }, @@ -11703,7 +11703,7 @@ "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "DATE", "recursive": false, "isPartOfKey": false } diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_alter_table_column.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_alter_table_column.json new file mode 100644 index 00000000000000..3c6c9737e8e193 --- /dev/null +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_alter_table_column.json @@ -0,0 +1,14 @@ +{ + "query_type": "UNKNOWN", + "query_type_props": {}, + "query_fingerprint": "7d04253c3add0194c557942ef9b7485f38e68762d300dad364b9cec8656035b3", + "in_tables": [], + "out_tables": [ + "urn:li:dataset:(urn:li:dataPlatform:bigquery,my-bq-project.covid_data.covid_deaths,PROD)" + ], + "column_lineage": null, + "debug_info": { + "confidence": 0.2, + "generalized_statement": "ALTER TABLE `my-bq-project.covid_data.covid_deaths` DROP COLUMN patient_name" + } +} \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_information_schema_query.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_information_schema_query.json index 4b9bbd06ecba65..f5f573f3d51136 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_information_schema_query.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_information_schema_query.json @@ -1,7 +1,7 @@ { "query_type": "SELECT", "query_type_props": {}, - "query_fingerprint": "772187d1c6ce8dbed2dd1ba79975b108d4e733015ffb7bcbf9b7146e64cf9914", + "query_fingerprint": "c721ce16410601b36e5f32bd9c5c28488500a93e617363739faebfe71496f163", "in_tables": [ "urn:li:dataset:(urn:li:dataPlatform:bigquery,acryl-staging-2.smoke_test_db_4.INFORMATION_SCHEMA.COLUMNS,PROD)", "urn:li:dataset:(urn:li:dataPlatform:bigquery,acryl-staging-2.smoke_test_db_4.INFORMATION_SCHEMA.COLUMN_FIELD_PATHS,PROD)" @@ -178,6 +178,6 @@ ], "debug_info": { "confidence": 0.2, - "generalized_statement": "SELECT c.table_catalog AS table_catalog, c.table_schema AS table_schema, c.table_name AS table_name, c.column_name AS column_name, c.ordinal_position AS ordinal_position, cfp.field_path AS field_path, c.is_nullable AS is_nullable, CASE WHEN CONTAINS_SUBSTR(field_path, ?) THEN NULL ELSE c.data_type END AS data_type, description AS comment, c.is_hidden AS is_hidden, c.is_partitioning_column AS is_partitioning_column, c.clustering_ordinal_position AS clustering_ordinal_position FROM `acryl-staging-2`.`smoke_test_db_4`.INFORMATION_SCHEMA.COLUMNS AS c JOIN `acryl-staging-2`.`smoke_test_db_4`.INFORMATION_SCHEMA.COLUMN_FIELD_PATHS AS cfp ON cfp.table_name = c.table_name AND cfp.column_name = c.column_name ORDER BY table_catalog, table_schema, table_name, ordinal_position ASC, data_type DESC" + "generalized_statement": "SELECT c.table_catalog AS table_catalog, c.table_schema AS table_schema, c.table_name AS table_name, c.column_name AS column_name, c.ordinal_position AS ordinal_position, cfp.field_path AS field_path, c.is_nullable AS is_nullable, CASE WHEN CONTAINS_SUBSTR(cfp.field_path, ?) THEN NULL ELSE c.data_type END AS data_type, description AS comment, c.is_hidden AS is_hidden, c.is_partitioning_column AS is_partitioning_column, c.clustering_ordinal_position AS clustering_ordinal_position FROM `acryl-staging-2`.`smoke_test_db_4`.INFORMATION_SCHEMA.COLUMNS AS c JOIN `acryl-staging-2`.`smoke_test_db_4`.INFORMATION_SCHEMA.COLUMN_FIELD_PATHS AS cfp ON cfp.table_name = c.table_name AND cfp.column_name = c.column_name ORDER BY table_catalog, table_schema, table_name, ordinal_position ASC, data_type DESC" } } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_merge_from_union.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_merge_from_union.json index 4ba44d9e54c9db..1a75dde4c634f2 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_merge_from_union.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_merge_from_union.json @@ -1,7 +1,7 @@ { "query_type": "MERGE", "query_type_props": {}, - "query_fingerprint": "38a78af8cc48333df0e4de7d6af5b9507a87dd8a2f129ef97c9b06dce2ca7b9f", + "query_fingerprint": "8001b852498d94a7f0f532dcd8cfa05328981ba437df6314466c764cc408969c", "in_tables": [ "urn:li:dataset:(urn:li:dataPlatform:bigquery,demo-pipelines-stg.referrer.prep_from_ios,PROD)", "urn:li:dataset:(urn:li:dataPlatform:bigquery,demo-pipelines-stg.referrer.prep_from_web,PROD)" @@ -12,6 +12,6 @@ "column_lineage": null, "debug_info": { "confidence": 0.2, - "generalized_statement": "MERGE INTO `demo-pipelines-stg`.`referrer`.`base_union` AS DBT_INTERNAL_DEST USING (SELECT * FROM `demo-pipelines-stg`.`referrer`.`prep_from_ios` WHERE partition_time = ? UNION ALL SELECT * FROM `demo-pipelines-stg`.`referrer`.`prep_from_web` WHERE partition_time = ?) AS DBT_INTERNAL_SOURCE ON FALSE WHEN NOT MATCHED BY SOURCE AND timestamp_trunc(DBT_INTERNAL_DEST.partition_time, DAY) IN (TIMESTAMP(?)) THEN delete WHEN NOT MATCHED THEN INSERT (`platform`, `pageview_id`, `query`, `referrer`, `partition_time`) VALUES (`platform`, `pageview_id`, `query`, `referrer`, `partition_time`)" + "generalized_statement": "MERGE INTO `demo-pipelines-stg`.`referrer`.`base_union` AS DBT_INTERNAL_DEST USING (SELECT * FROM `demo-pipelines-stg`.`referrer`.`prep_from_ios` WHERE partition_time = ? UNION ALL SELECT * FROM `demo-pipelines-stg`.`referrer`.`prep_from_web` WHERE partition_time = ?) AS DBT_INTERNAL_SOURCE ON FALSE WHEN NOT MATCHED BY SOURCE AND timestamp_trunc(DBT_INTERNAL_DEST.partition_time, DAY) IN (timestamp(?)) THEN delete WHEN NOT MATCHED THEN INSERT (`platform`, `pageview_id`, `query`, `referrer`, `partition_time`) VALUES (`platform`, `pageview_id`, `query`, `referrer`, `partition_time`)" } } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_drop_schema.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_drop_schema.json new file mode 100644 index 00000000000000..2784b8e9543b28 --- /dev/null +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_drop_schema.json @@ -0,0 +1,12 @@ +{ + "query_type": "UNKNOWN", + "query_type_props": {}, + "query_fingerprint": "4eefab57619a812a94030acce0071857561265945e79d798563adb53bd0b9646", + "in_tables": [], + "out_tables": [], + "column_lineage": null, + "debug_info": { + "confidence": 0.9, + "generalized_statement": "DROP SCHEMA my_schema" + } +} \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_sqlite_drop_table.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_sqlite_drop_table.json new file mode 100644 index 00000000000000..ae8b3f99897dc7 --- /dev/null +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_sqlite_drop_table.json @@ -0,0 +1,14 @@ +{ + "query_type": "UNKNOWN", + "query_type_props": {}, + "query_fingerprint": "d1c29ad73325b08bb66e62ec00ba1d5be4412ec72b4bbc9c094f1272b9da4f86", + "in_tables": [], + "out_tables": [ + "urn:li:dataset:(urn:li:dataPlatform:sqlite,my_schema.my_table,PROD)" + ], + "column_lineage": null, + "debug_info": { + "confidence": 0.2, + "generalized_statement": "DROP TABLE my_schema.my_table" + } +} \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_sqlite_drop_view.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_sqlite_drop_view.json new file mode 100644 index 00000000000000..6650ef396a5705 --- /dev/null +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_sqlite_drop_view.json @@ -0,0 +1,14 @@ +{ + "query_type": "UNKNOWN", + "query_type_props": {}, + "query_fingerprint": "35a3c60e7ed98884dde3f1f5fe9079f844832430589a3326b97d617b8303f191", + "in_tables": [], + "out_tables": [ + "urn:li:dataset:(urn:li:dataPlatform:sqlite,my_schema.my_view,PROD)" + ], + "column_lineage": null, + "debug_info": { + "confidence": 0.2, + "generalized_statement": "DROP VIEW my_schema.my_view" + } +} \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py index e5b669329f16c4..1bd634e9d10ec3 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py +++ b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py @@ -2,11 +2,22 @@ import pytest +import datahub.testing.check_sql_parser_result as checker from datahub.testing.check_sql_parser_result import assert_sql_result RESOURCE_DIR = pathlib.Path(__file__).parent / "goldens" +@pytest.fixture(autouse=True) +def set_update_sql_parser( + pytestconfig: pytest.Config, monkeypatch: pytest.MonkeyPatch +) -> None: + update_golden = pytestconfig.getoption("--update-golden-files") + + if update_golden: + monkeypatch.setattr(checker, "UPDATE_FILES", True) + + def test_invalid_sql(): assert_sql_result( """ @@ -1188,7 +1199,7 @@ def test_bigquery_information_schema_query() -> None: c.ordinal_position as ordinal_position, cfp.field_path as field_path, c.is_nullable as is_nullable, - CASE WHEN CONTAINS_SUBSTR(field_path, ".") THEN NULL ELSE c.data_type END as data_type, + CASE WHEN CONTAINS_SUBSTR(cfp.field_path, ".") THEN NULL ELSE c.data_type END as data_type, description as comment, c.is_hidden as is_hidden, c.is_partitioning_column as is_partitioning_column, @@ -1202,3 +1213,43 @@ def test_bigquery_information_schema_query() -> None: dialect="bigquery", expected_file=RESOURCE_DIR / "test_bigquery_information_schema_query.json", ) + + +def test_bigquery_alter_table_column() -> None: + assert_sql_result( + """\ +ALTER TABLE `my-bq-project.covid_data.covid_deaths` drop COLUMN patient_name + """, + dialect="bigquery", + expected_file=RESOURCE_DIR / "test_bigquery_alter_table_column.json", + ) + + +def test_sqlite_drop_table() -> None: + assert_sql_result( + """\ +DROP TABLE my_schema.my_table +""", + dialect="sqlite", + expected_file=RESOURCE_DIR / "test_sqlite_drop_table.json", + ) + + +def test_sqlite_drop_view() -> None: + assert_sql_result( + """\ +DROP VIEW my_schema.my_view +""", + dialect="sqlite", + expected_file=RESOURCE_DIR / "test_sqlite_drop_view.json", + ) + + +def test_snowflake_drop_schema() -> None: + assert_sql_result( + """\ +DROP SCHEMA my_schema +""", + dialect="snowflake", + expected_file=RESOURCE_DIR / "test_snowflake_drop_schema.json", + ) diff --git a/metadata-ingestion/tests/unit/test_dbt_source.py b/metadata-ingestion/tests/unit/test_dbt_source.py index 01d7a4809b01b8..90ff78b16f652b 100644 --- a/metadata-ingestion/tests/unit/test_dbt_source.py +++ b/metadata-ingestion/tests/unit/test_dbt_source.py @@ -247,7 +247,6 @@ def test_dbt_config_prefer_sql_parser_lineage(): "catalog_path": "dummy_path", "target_platform": "dummy_platform", "skip_sources_in_lineage": True, - "entities_enabled": {"sources": "NO"}, "prefer_sql_parser_lineage": True, } config = DBTCoreConfig.parse_obj(config_dict) diff --git a/metadata-ingestion/tests/unit/test_tableau_source.py b/metadata-ingestion/tests/unit/test_tableau_source.py index f5410b161ed703..1cd0557d085f19 100644 --- a/metadata-ingestion/tests/unit/test_tableau_source.py +++ b/metadata-ingestion/tests/unit/test_tableau_source.py @@ -1,8 +1,37 @@ +from typing import Any, Dict + import pytest import datahub.ingestion.source.tableau_constant as c from datahub.ingestion.source.tableau import TableauSiteSource -from datahub.ingestion.source.tableau_common import get_filter_pages, make_filter +from datahub.ingestion.source.tableau_common import ( + get_filter_pages, + make_filter, + tableau_field_to_schema_field, +) +from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaField + + +def test_tablea_source_handles_none_nativedatatype(): + field: Dict[str, Any] = { + "__typename": "CalculatedField", + "id": "abcd", + "name": "Test Field", + "description": None, + "isHidden": False, + "folderName": None, + "upstreamFields": [], + "upstreamColumns": [], + "role": None, + "dataType": None, + "defaultFormat": "s", + "aggregation": None, + "formula": "a/b + d", + } + schema_field: SchemaField = tableau_field_to_schema_field( + field=field, ingest_tags=False + ) + assert schema_field.nativeDataType == "UNKNOWN" def test_tableau_source_unescapes_lt(): diff --git a/metadata-ingestion/tests/unit/utilities/test_sqlalchemy_type_converter.py b/metadata-ingestion/tests/unit/utilities/test_sqlalchemy_type_converter.py index 6c719d351c4c20..b080819cea95be 100644 --- a/metadata-ingestion/tests/unit/utilities/test_sqlalchemy_type_converter.py +++ b/metadata-ingestion/tests/unit/utilities/test_sqlalchemy_type_converter.py @@ -1,6 +1,8 @@ from typing import no_type_check +from unittest.mock import MagicMock from sqlalchemy import types +from sqlalchemy.engine.default import DefaultDialect from sqlalchemy_bigquery import STRUCT from datahub.metadata.schema_classes import ( @@ -17,8 +19,11 @@ def test_get_avro_schema_for_sqlalchemy_column(): + inspector_magic_mock = MagicMock() + inspector_magic_mock.dialect = DefaultDialect() + schema_fields = get_schema_fields_for_sqlalchemy_column( - column_name="test", column_type=types.INTEGER() + column_name="test", column_type=types.INTEGER(), inspector=inspector_magic_mock ) assert len(schema_fields) == 1 assert schema_fields[0].fieldPath == "[version=2.0].[type=int].test" @@ -27,7 +32,10 @@ def test_get_avro_schema_for_sqlalchemy_column(): assert schema_fields[0].nullable is True schema_fields = get_schema_fields_for_sqlalchemy_column( - column_name="test", column_type=types.String(), nullable=False + column_name="test", + column_type=types.String(), + nullable=False, + inspector=inspector_magic_mock, ) assert len(schema_fields) == 1 assert schema_fields[0].fieldPath == "[version=2.0].[type=string].test" @@ -37,8 +45,13 @@ def test_get_avro_schema_for_sqlalchemy_column(): def test_get_avro_schema_for_sqlalchemy_array_column(): + inspector_magic_mock = MagicMock() + inspector_magic_mock.dialect = DefaultDialect() + schema_fields = get_schema_fields_for_sqlalchemy_column( - column_name="test", column_type=types.ARRAY(types.FLOAT()) + column_name="test", + column_type=types.ARRAY(types.FLOAT()), + inspector=inspector_magic_mock, ) assert len(schema_fields) == 1 assert ( @@ -50,8 +63,13 @@ def test_get_avro_schema_for_sqlalchemy_array_column(): def test_get_avro_schema_for_sqlalchemy_map_column(): + inspector_magic_mock = MagicMock() + inspector_magic_mock.dialect = DefaultDialect() + schema_fields = get_schema_fields_for_sqlalchemy_column( - column_name="test", column_type=MapType(types.String(), types.BOOLEAN()) + column_name="test", + column_type=MapType(types.String(), types.BOOLEAN()), + inspector=inspector_magic_mock, ) assert len(schema_fields) == 1 assert ( @@ -65,9 +83,13 @@ def test_get_avro_schema_for_sqlalchemy_map_column(): def test_get_avro_schema_for_sqlalchemy_struct_column() -> None: + inspector_magic_mock = MagicMock() + inspector_magic_mock.dialect = DefaultDialect() schema_fields = get_schema_fields_for_sqlalchemy_column( - column_name="test", column_type=STRUCT(("test", types.INTEGER())) + column_name="test", + column_type=STRUCT(("test", types.INTEGER())), + inspector=inspector_magic_mock, ) assert len(schema_fields) == 2 assert ( @@ -86,7 +108,12 @@ def test_get_avro_schema_for_sqlalchemy_struct_column() -> None: @no_type_check def test_get_avro_schema_for_sqlalchemy_unknown_column(): - schema_fields = get_schema_fields_for_sqlalchemy_column("invalid", "test") + inspector_magic_mock = MagicMock() + inspector_magic_mock.dialect = DefaultDialect() + + schema_fields = get_schema_fields_for_sqlalchemy_column( + "invalid", "test", inspector=inspector_magic_mock + ) assert len(schema_fields) == 1 assert schema_fields[0].type.type == NullTypeClass() assert schema_fields[0].fieldPath == "[version=2.0].[type=null]" diff --git a/metadata-integration/java/datahub-client/src/main/java/datahub/client/kafka/KafkaEmitter.java b/metadata-integration/java/datahub-client/src/main/java/datahub/client/kafka/KafkaEmitter.java index a9340d18749ade..d00dc09669045f 100644 --- a/metadata-integration/java/datahub-client/src/main/java/datahub/client/kafka/KafkaEmitter.java +++ b/metadata-integration/java/datahub-client/src/main/java/datahub/client/kafka/KafkaEmitter.java @@ -160,7 +160,7 @@ private static MetadataWriteResponse mapResponse(RecordMetadata metadata, Except return builder.build(); } - public Properties getKafkaConfgiProperties() { + public Properties getKafkaConfigProperties() { return kafkaConfigProperties; } } diff --git a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/EntityApiUtils.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/EntityApiUtils.java index 656534e24f551f..fa6d9cae45a6a4 100644 --- a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/EntityApiUtils.java +++ b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/EntityApiUtils.java @@ -1,6 +1,6 @@ package com.linkedin.metadata.entity; -import static com.linkedin.metadata.Constants.DEFAULT_RUN_ID; +import static com.linkedin.metadata.utils.SystemMetadataUtils.createDefaultSystemMetadata; import com.datahub.util.RecordUtils; import com.linkedin.common.urn.Urn; @@ -37,10 +37,7 @@ public static RecordTemplate buildKeyAspect( public static SystemMetadata parseSystemMetadata(String jsonSystemMetadata) { if (jsonSystemMetadata == null || jsonSystemMetadata.equals("")) { - SystemMetadata response = new SystemMetadata(); - response.setRunId(DEFAULT_RUN_ID); - response.setLastObserved(0); - return response; + return createDefaultSystemMetadata(); } return RecordUtils.toRecordTemplate(SystemMetadata.class, jsonSystemMetadata); } diff --git a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java index a23f6ab175046b..3ec090a3db3a45 100644 --- a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java +++ b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java @@ -9,6 +9,7 @@ import com.linkedin.metadata.aspect.batch.BatchItem; import com.linkedin.metadata.aspect.batch.ChangeMCP; import com.linkedin.metadata.aspect.batch.MCPItem; +import com.linkedin.metadata.aspect.plugins.hooks.MutationHook; import com.linkedin.metadata.aspect.plugins.validation.ValidationExceptionCollection; import com.linkedin.mxe.MetadataChangeProposal; import com.linkedin.util.Pair; @@ -47,7 +48,7 @@ public Pair>, List> toUpsertBatchItems( final Map> latestAspects) { // Process proposals to change items - Stream mutatedProposalsStream = + Stream mutatedProposalsStream = proposedItemsToChangeItemStream( items.stream() .filter(item -> item instanceof ProposedItem) @@ -92,21 +93,58 @@ public Pair>, List> toUpsertBatchItems( LinkedList newItems = applyMCPSideEffects(upsertBatchItems).collect(Collectors.toCollection(LinkedList::new)); - Map> newUrnAspectNames = getNewUrnAspectsMap(getUrnAspectsMap(), newItems); upsertBatchItems.addAll(newItems); + Map> newUrnAspectNames = + getNewUrnAspectsMap(getUrnAspectsMap(), upsertBatchItems); return Pair.of(newUrnAspectNames, upsertBatchItems); } - private Stream proposedItemsToChangeItemStream(List proposedItems) { - return applyProposalMutationHooks(proposedItems, retrieverContext) - .filter(mcpItem -> mcpItem.getMetadataChangeProposal() != null) - .map( - mcpItem -> - ChangeItemImpl.ChangeItemImplBuilder.build( - mcpItem.getMetadataChangeProposal(), - mcpItem.getAuditStamp(), - retrieverContext.getAspectRetriever())); + private Stream proposedItemsToChangeItemStream(List proposedItems) { + List mutationHooks = + retrieverContext.getAspectRetriever().getEntityRegistry().getAllMutationHooks(); + Stream unmutatedItems = + proposedItems.stream() + .filter( + proposedItem -> + mutationHooks.stream() + .noneMatch( + mutationHook -> + mutationHook.shouldApply( + proposedItem.getChangeType(), + proposedItem.getUrn(), + proposedItem.getAspectName()))) + .map( + mcpItem -> { + if (ChangeType.PATCH.equals(mcpItem.getChangeType())) { + return PatchItemImpl.PatchItemImplBuilder.build( + mcpItem.getMetadataChangeProposal(), + mcpItem.getAuditStamp(), + retrieverContext.getAspectRetriever().getEntityRegistry()); + } + return ChangeItemImpl.ChangeItemImplBuilder.build( + mcpItem.getMetadataChangeProposal(), + mcpItem.getAuditStamp(), + retrieverContext.getAspectRetriever()); + }); + List mutatedItems = + applyProposalMutationHooks(proposedItems, retrieverContext).collect(Collectors.toList()); + Stream proposedItemsToChangeItems = + mutatedItems.stream() + .filter(mcpItem -> mcpItem.getMetadataChangeProposal() != null) + // Filter on proposed items again to avoid applying builder to Patch Item side effects + .filter(mcpItem -> mcpItem instanceof ProposedItem) + .map( + mcpItem -> + ChangeItemImpl.ChangeItemImplBuilder.build( + mcpItem.getMetadataChangeProposal(), + mcpItem.getAuditStamp(), + retrieverContext.getAspectRetriever())); + Stream sideEffectItems = + mutatedItems.stream().filter(mcpItem -> !(mcpItem instanceof ProposedItem)); + Stream combinedChangeItems = + Stream.concat(proposedItemsToChangeItems, unmutatedItems); + return Stream.concat(combinedChangeItems, sideEffectItems); } public static class AspectsBatchImplBuilder { @@ -123,7 +161,7 @@ public AspectsBatchImplBuilder one(BatchItem data, RetrieverContext retrieverCon } public AspectsBatchImplBuilder mcps( - List mcps, + Collection mcps, AuditStamp auditStamp, RetrieverContext retrieverContext) { @@ -132,16 +170,22 @@ public AspectsBatchImplBuilder mcps( mcps.stream() .map( mcp -> { - if (mcp.getChangeType().equals(ChangeType.PATCH)) { - return PatchItemImpl.PatchItemImplBuilder.build( - mcp, - auditStamp, - retrieverContext.getAspectRetriever().getEntityRegistry()); - } else { - return ChangeItemImpl.ChangeItemImplBuilder.build( - mcp, auditStamp, retrieverContext.getAspectRetriever()); + try { + if (mcp.getChangeType().equals(ChangeType.PATCH)) { + return PatchItemImpl.PatchItemImplBuilder.build( + mcp, + auditStamp, + retrieverContext.getAspectRetriever().getEntityRegistry()); + } else { + return ChangeItemImpl.ChangeItemImplBuilder.build( + mcp, auditStamp, retrieverContext.getAspectRetriever()); + } + } catch (IllegalArgumentException e) { + log.error("Invalid proposal, skipping and proceeding with batch: " + mcp, e); + return null; } }) + .filter(Objects::nonNull) .collect(Collectors.toList())); return this; } diff --git a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/ProposedItem.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/ProposedItem.java index 452ed39ddf3174..132a731d278af8 100644 --- a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/ProposedItem.java +++ b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/ProposedItem.java @@ -7,6 +7,7 @@ import com.linkedin.metadata.aspect.batch.MCPItem; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.metadata.utils.EntityKeyUtils; import com.linkedin.metadata.utils.GenericRecordUtils; import com.linkedin.mxe.MetadataChangeProposal; import com.linkedin.mxe.SystemMetadata; @@ -63,7 +64,12 @@ public RecordTemplate getRecordTemplate() { @Nonnull @Override public Urn getUrn() { - return metadataChangeProposal.getEntityUrn(); + Urn urn = metadataChangeProposal.getEntityUrn(); + if (urn == null) { + urn = + EntityKeyUtils.getUrnFromProposal(metadataChangeProposal, entitySpec.getKeyAspectSpec()); + } + return urn; } @Nullable diff --git a/metadata-io/metadata-io-api/src/test/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImplTest.java b/metadata-io/metadata-io-api/src/test/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImplTest.java index d2e7243d045604..31dd868b4cb4a3 100644 --- a/metadata-io/metadata-io-api/src/test/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImplTest.java +++ b/metadata-io/metadata-io-api/src/test/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImplTest.java @@ -1,22 +1,26 @@ package com.linkedin.metadata.entity.ebean.batch; -import static com.linkedin.metadata.Constants.DATASET_ENTITY_NAME; -import static com.linkedin.metadata.Constants.STATUS_ASPECT_NAME; -import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTIES_ASPECT_NAME; +import static com.linkedin.metadata.Constants.*; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; import static org.testng.Assert.assertEquals; +import com.google.common.collect.ImmutableList; +import com.linkedin.common.FabricType; import com.linkedin.common.Status; +import com.linkedin.common.urn.DataPlatformUrn; +import com.linkedin.common.urn.DatasetUrn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.ByteString; import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; +import com.linkedin.dataset.DatasetProperties; import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.GraphRetriever; import com.linkedin.metadata.aspect.batch.MCPItem; import com.linkedin.metadata.aspect.patch.GenericJsonPatch; import com.linkedin.metadata.aspect.patch.PatchOperationType; +import com.linkedin.metadata.aspect.patch.builder.DatasetPropertiesPatchBuilder; import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; import com.linkedin.metadata.aspect.plugins.hooks.MutationHook; import com.linkedin.metadata.entity.SearchRetriever; @@ -297,6 +301,38 @@ public void toUpsertBatchItemsProposedItemTest() { "Mutation to status aspect"); } + @Test + public void singleInvalidDoesntBreakBatch() { + MetadataChangeProposal proposal1 = + new DatasetPropertiesPatchBuilder() + .urn(new DatasetUrn(new DataPlatformUrn("platform"), "name", FabricType.PROD)) + .setDescription("something") + .setName("name") + .addCustomProperty("prop1", "propVal1") + .addCustomProperty("prop2", "propVal2") + .build(); + MetadataChangeProposal proposal2 = + new MetadataChangeProposal() + .setEntityType(DATASET_ENTITY_NAME) + .setAspectName(DATASET_PROPERTIES_ASPECT_NAME) + .setAspect(GenericRecordUtils.serializeAspect(new DatasetProperties())) + .setChangeType(ChangeType.UPSERT); + + AspectsBatchImpl testBatch = + AspectsBatchImpl.builder() + .mcps( + ImmutableList.of(proposal1, proposal2), + AuditStampUtils.createDefaultAuditStamp(), + retrieverContext) + .retrieverContext(retrieverContext) + .build(); + + assertEquals( + testBatch.toUpsertBatchItems(Map.of()).getSecond().size(), + 1, + "Expected 1 valid mcp to be passed through."); + } + /** Converts unsupported to status aspect */ @Getter @Setter diff --git a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java index 337288ab59c603..60a991c19ae8bf 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java @@ -12,7 +12,6 @@ import com.linkedin.common.AuditStamp; import com.linkedin.common.VersionedUrn; import com.linkedin.common.urn.Urn; -import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.DataMap; import com.linkedin.data.template.RecordTemplate; import com.linkedin.data.template.StringArray; @@ -24,6 +23,7 @@ import com.linkedin.metadata.aspect.EnvelopedAspectArray; import com.linkedin.metadata.aspect.VersionedAspect; import com.linkedin.metadata.aspect.batch.AspectsBatch; +import com.linkedin.metadata.aspect.batch.BatchItem; import com.linkedin.metadata.browse.BrowseResult; import com.linkedin.metadata.browse.BrowseResultV2; import com.linkedin.metadata.entity.DeleteEntityService; @@ -48,6 +48,7 @@ import com.linkedin.metadata.search.client.CachingEntitySearchService; import com.linkedin.metadata.service.RollbackService; import com.linkedin.metadata.timeseries.TimeseriesAspectService; +import com.linkedin.metadata.utils.AuditStampUtils; import com.linkedin.metadata.utils.metrics.MetricUtils; import com.linkedin.mxe.MetadataChangeProposal; import com.linkedin.mxe.PlatformEvent; @@ -60,6 +61,8 @@ import java.net.URISyntaxException; import java.time.Clock; import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -68,6 +71,7 @@ import java.util.Optional; import java.util.Set; import java.util.function.Supplier; +import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.RequiredArgsConstructor; @@ -372,7 +376,13 @@ public SearchResult search( return ValidationUtils.validateSearchResult( opContext, entitySearchService.search( - opContext, List.of(entity), input, newFilter(requestFilters), null, start, count), + opContext, + List.of(entity), + input, + newFilter(requestFilters), + Collections.emptyList(), + start, + count), entityService); } @@ -403,7 +413,7 @@ public ListResult list( opContext.withSearchFlags(flags -> flags.setFulltext(false)), entity, newFilter(requestFilters), - null, + Collections.emptyList(), start, count)), entityService); @@ -414,7 +424,7 @@ public ListResult list( * * @param input search query * @param filter search filters - * @param sortCriterion sort criterion + * @param sortCriteria sort criteria * @param start start offset for search results * @param count max number of search results requested * @return Snapshot key @@ -426,14 +436,14 @@ public SearchResult search( @Nonnull String entity, @Nonnull String input, @Nullable Filter filter, - @Nullable SortCriterion sortCriterion, + List sortCriteria, int start, int count) throws RemoteInvocationException { return ValidationUtils.validateSearchResult( opContext, entitySearchService.search( - opContext, List.of(entity), input, filter, sortCriterion, start, count), + opContext, List.of(entity), input, filter, sortCriteria, start, count), entityService); } @@ -446,10 +456,10 @@ public SearchResult searchAcrossEntities( @Nullable Filter filter, int start, int count, - @Nullable SortCriterion sortCriterion) + List sortCriteria) throws RemoteInvocationException { return searchAcrossEntities( - opContext, entities, input, filter, start, count, sortCriterion, null); + opContext, entities, input, filter, start, count, sortCriteria, null); } /** @@ -461,7 +471,7 @@ public SearchResult searchAcrossEntities( * @param start start offset for search results * @param count max number of search results requested * @param facets list of facets we want aggregations for - * @param sortCriterion sorting criterion + * @param sortCriteria sorting criteria * @return Snapshot key * @throws RemoteInvocationException when unable to execute request */ @@ -473,7 +483,7 @@ public SearchResult searchAcrossEntities( @Nullable Filter filter, int start, int count, - @Nullable SortCriterion sortCriterion, + List sortCriteria, @Nullable List facets) throws RemoteInvocationException { @@ -484,7 +494,7 @@ public SearchResult searchAcrossEntities( entities, input, filter, - sortCriterion, + sortCriteria, start, count, facets), @@ -526,7 +536,7 @@ public LineageSearchResult searchAcrossLineage( @Nullable String input, @Nullable Integer maxHops, @Nullable Filter filter, - @Nullable SortCriterion sortCriterion, + List sortCriteria, int start, int count) throws RemoteInvocationException { @@ -540,7 +550,7 @@ public LineageSearchResult searchAcrossLineage( input, maxHops, filter, - sortCriterion, + sortCriteria, start, count), entityService); @@ -556,7 +566,7 @@ public LineageScrollResult scrollAcrossLineage( @Nullable String input, @Nullable Integer maxHops, @Nullable Filter filter, - @Nullable SortCriterion sortCriterion, + List sortCriteria, @Nullable String scrollId, @Nonnull String keepAlive, int count) @@ -574,7 +584,7 @@ public LineageScrollResult scrollAcrossLineage( input, maxHops, filter, - sortCriterion, + sortCriteria, scrollId, keepAlive, count), @@ -642,7 +652,7 @@ public SearchResult filter( @Nonnull OperationContext opContext, @Nonnull String entity, @Nonnull Filter filter, - @Nullable SortCriterion sortCriterion, + List sortCriteria, int start, int count) throws RemoteInvocationException { @@ -652,7 +662,7 @@ public SearchResult filter( opContext.withSearchFlags(flags -> flags.setFulltext(true)), entity, filter, - sortCriterion, + sortCriteria, start, count), entityService); @@ -738,35 +748,54 @@ public List getTimeseriesAspectValues( return response.getValues(); } - // TODO: Factor out ingest logic into a util that can be accessed by the java client and the - // resource @Override - public String ingestProposal( + @Nonnull + public List batchIngestProposals( @Nonnull OperationContext opContext, - @Nonnull final MetadataChangeProposal metadataChangeProposal, - final boolean async) - throws RemoteInvocationException { + @Nonnull Collection metadataChangeProposals, + boolean async) { String actorUrnStr = opContext.getSessionAuthentication().getActor() != null ? opContext.getSessionAuthentication().getActor().toUrnStr() : Constants.UNKNOWN_ACTOR; - final AuditStamp auditStamp = - new AuditStamp().setTime(_clock.millis()).setActor(UrnUtils.getUrn(actorUrnStr)); + final AuditStamp auditStamp = AuditStampUtils.createAuditStamp(actorUrnStr); AspectsBatch batch = AspectsBatchImpl.builder() - .mcps( - List.of(metadataChangeProposal), auditStamp, opContext.getRetrieverContext().get()) + .mcps(metadataChangeProposals, auditStamp, opContext.getRetrieverContext().get()) .build(); - Optional one = - entityService.ingestProposal(opContext, batch, async).stream().findFirst(); + Map> resultMap = + entityService.ingestProposal(opContext, batch, async).stream() + .collect(Collectors.groupingBy(IngestResult::getRequest)); + + // Update runIds + batch.getItems().stream() + .filter(resultMap::containsKey) + .forEach( + requestItem -> { + List results = resultMap.get(requestItem); + Optional resultUrn = + results.stream().map(IngestResult::getUrn).filter(Objects::nonNull).findFirst(); + resultUrn.ifPresent( + urn -> tryIndexRunId(opContext, urn, requestItem.getSystemMetadata())); + }); - Urn urn = one.map(IngestResult::getUrn).orElse(metadataChangeProposal.getEntityUrn()); - if (one.isPresent()) { - tryIndexRunId(opContext, urn, metadataChangeProposal.getSystemMetadata()); - } - return urn.toString(); + // Preserve ordering + return batch.getItems().stream() + .map( + requestItem -> { + if (resultMap.containsKey(requestItem)) { + List results = resultMap.get(requestItem); + return results.stream() + .filter(r -> r.getUrn() != null) + .findFirst() + .map(r -> r.getUrn().toString()) + .orElse(null); + } + return null; + }) + .collect(Collectors.toList()); } @SneakyThrows diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java index 8c83e7f469fe31..3e640365f3fd2a 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java @@ -2,7 +2,6 @@ import static com.linkedin.metadata.Constants.APP_SOURCE; import static com.linkedin.metadata.Constants.ASPECT_LATEST_VERSION; -import static com.linkedin.metadata.Constants.DEFAULT_RUN_ID; import static com.linkedin.metadata.Constants.FORCE_INDEXING_KEY; import static com.linkedin.metadata.Constants.STATUS_ASPECT_NAME; import static com.linkedin.metadata.Constants.SYSTEM_ACTOR; @@ -10,6 +9,7 @@ import static com.linkedin.metadata.utils.PegasusUtils.constructMCL; import static com.linkedin.metadata.utils.PegasusUtils.getDataTemplateClassFromSchema; import static com.linkedin.metadata.utils.PegasusUtils.urnToEntityName; +import static com.linkedin.metadata.utils.SystemMetadataUtils.createDefaultSystemMetadata; import static com.linkedin.metadata.utils.metrics.ExceptionUtils.collectMetrics; import com.codahale.metrics.Timer; @@ -1835,10 +1835,7 @@ public void ingestEntities( @Override public SystemMetadata ingestEntity( @Nonnull OperationContext opContext, Entity entity, AuditStamp auditStamp) { - SystemMetadata generatedSystemMetadata = new SystemMetadata(); - generatedSystemMetadata.setRunId(DEFAULT_RUN_ID); - generatedSystemMetadata.setLastObserved(System.currentTimeMillis()); - + SystemMetadata generatedSystemMetadata = createDefaultSystemMetadata(); ingestEntity(opContext, entity, auditStamp, generatedSystemMetadata); return generatedSystemMetadata; } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/dgraph/DgraphGraphService.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/dgraph/DgraphGraphService.java index 27b603244d3b35..6703e07bfd915c 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/dgraph/DgraphGraphService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/dgraph/DgraphGraphService.java @@ -788,7 +788,7 @@ public RelatedEntitiesScrollResult scrollRelatedEntities( @Nonnull Filter destinationEntityFilter, @Nonnull List relationshipTypes, @Nonnull RelationshipFilter relationshipFilter, - @Nonnull List sortCriterion, + @Nonnull List sortCriteria, @Nullable String scrollId, int count, @Nullable Long startTimeMillis, diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java index e21c4e15b4fc9c..50e5aa6ba893d4 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java @@ -1323,7 +1323,7 @@ public SearchResponse getSearchResponse( @Nullable final Filter destinationEntityFilter, @Nonnull final List relationshipTypes, @Nonnull final RelationshipFilter relationshipFilter, - @Nonnull List sortCriterion, + @Nonnull List sortCriteria, @Nullable String scrollId, int count) { @@ -1336,12 +1336,12 @@ public SearchResponse getSearchResponse( relationshipTypes, relationshipFilter); - return executeScrollSearchQuery(finalQuery, sortCriterion, scrollId, count); + return executeScrollSearchQuery(finalQuery, sortCriteria, scrollId, count); } private SearchResponse executeScrollSearchQuery( @Nonnull final QueryBuilder query, - @Nonnull List sortCriterion, + @Nonnull List sortCriteria, @Nullable String scrollId, final int count) { @@ -1357,7 +1357,7 @@ private SearchResponse executeScrollSearchQuery( searchSourceBuilder.size(count); searchSourceBuilder.query(query); - ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion, List.of(), false); + ESUtils.buildSortOrder(searchSourceBuilder, sortCriteria, List.of(), false); searchRequest.source(searchSourceBuilder); ESUtils.setSearchAfter(searchSourceBuilder, sort, null, null); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java index adcc64fd2bea73..e1532ea4e26c06 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java @@ -64,6 +64,7 @@ public class ElasticSearchGraphService implements GraphService, ElasticSearchInd private final ESGraphWriteDAO _graphWriteDAO; private final ESGraphQueryDAO _graphReadDAO; private final ESIndexBuilder _indexBuilder; + private final String idHashAlgo; public static final String INDEX_NAME = "graph_service_v1"; private static final Map EMPTY_HASH = new HashMap<>(); @@ -125,7 +126,7 @@ public LineageRegistry getLineageRegistry() { @Override public void addEdge(@Nonnull final Edge edge) { - String docId = edge.toDocId(); + String docId = edge.toDocId(idHashAlgo); String edgeDocument = toDocument(edge); _graphWriteDAO.upsertDocument(docId, edgeDocument); } @@ -137,7 +138,7 @@ public void upsertEdge(@Nonnull final Edge edge) { @Override public void removeEdge(@Nonnull final Edge edge) { - String docId = edge.toDocId(); + String docId = edge.toDocId(idHashAlgo); _graphWriteDAO.deleteDocument(docId); } @@ -315,7 +316,7 @@ public RelatedEntitiesScrollResult scrollRelatedEntities( @Nullable Filter destinationEntityFilter, @Nonnull List relationshipTypes, @Nonnull RelationshipFilter relationshipFilter, - @Nonnull List sortCriterion, + @Nonnull List sortCriteria, @Nullable String scrollId, int count, @Nullable Long startTimeMillis, @@ -331,7 +332,7 @@ public RelatedEntitiesScrollResult scrollRelatedEntities( destinationEntityFilter, relationshipTypes, relationshipFilter, - sortCriterion, + sortCriteria, scrollId, count); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java index 16c0804538dd78..9fe9c242fe48c7 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java @@ -921,7 +921,7 @@ public RelatedEntitiesScrollResult scrollRelatedEntities( @Nonnull Filter destinationEntityFilter, @Nonnull List relationshipTypes, @Nonnull RelationshipFilter relationshipFilter, - @Nonnull List sortCriterion, + @Nonnull List sortCriteria, @Nullable String scrollId, int count, @Nullable Long startTimeMillis, diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java index 95c8eb13beb937..d07882963e2811 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java @@ -118,7 +118,7 @@ public class LineageSearchService { * @param maxHops the maximum number of hops away to search for. If null, defaults to 1000 * @param inputFilters the request map with fields and values as filters to be applied to search * hits - * @param sortCriterion {@link SortCriterion} to be applied to search results + * @param sortCriteria list of {@link SortCriterion} to be applied to search results * @param from index to start the search from * @param size the number of search hits to return * @return a {@link LineageSearchResult} that contains a list of matched documents and related @@ -134,7 +134,7 @@ public LineageSearchResult searchAcrossLineage( @Nullable String input, @Nullable Integer maxHops, @Nullable Filter inputFilters, - @Nullable SortCriterion sortCriterion, + List sortCriteria, int from, int size) { @@ -255,7 +255,7 @@ public LineageSearchResult searchAcrossLineage( SearchUtils.removeCriteria( inputFilters, criterion -> criterion.getField().equals(DEGREE_FILTER_INPUT)); - if (canDoLightning(lineageRelationships, finalInput, reducedFilters, sortCriterion)) { + if (canDoLightning(lineageRelationships, finalInput, reducedFilters, sortCriteria)) { codePath = "lightning"; // use lightning approach to return lineage search results LineageSearchResult lineageSearchResult = @@ -276,7 +276,7 @@ public LineageSearchResult searchAcrossLineage( lineageRelationships, finalInput, reducedFilters, - sortCriterion, + sortCriteria, from, size); if (!lineageSearchResult.getEntities().isEmpty()) { @@ -303,7 +303,7 @@ boolean canDoLightning( List lineageRelationships, String input, Filter inputFilters, - SortCriterion sortCriterion) { + List sortCriteria) { boolean simpleFilters = inputFilters == null || inputFilters.getOr() == null @@ -318,7 +318,7 @@ boolean canDoLightning( return (lineageRelationships.size() > cacheConfiguration.getLightningThreshold()) && input.equals("*") && simpleFilters - && sortCriterion == null; + && CollectionUtils.isEmpty(sortCriteria); } @VisibleForTesting @@ -533,7 +533,7 @@ private LineageSearchResult getSearchResultInBatches( List lineageRelationships, @Nonnull String input, @Nullable Filter inputFilters, - @Nullable SortCriterion sortCriterion, + List sortCriteria, int from, int size) { @@ -566,7 +566,7 @@ private LineageSearchResult getSearchResultInBatches( entitiesToQuery, input, finalFilter, - sortCriterion, + sortCriteria, queryFrom, querySize), urnToRelationship); @@ -761,7 +761,7 @@ private LineageSearchEntity buildLineageSearchEntity( * @param maxHops the maximum number of hops away to search for. If null, defaults to 1000 * @param inputFilters the request map with fields and values as filters to be applied to search * hits - * @param sortCriterion {@link SortCriterion} to be applied to search results + * @param sortCriteria list of {@link SortCriterion} to be applied to search results * @param scrollId opaque scroll identifier to pass to search service * @param size the number of search hits to return * @return a {@link LineageSearchResult} that contains a list of matched documents and related @@ -777,7 +777,7 @@ public LineageScrollResult scrollAcrossLineage( @Nullable String input, @Nullable Integer maxHops, @Nullable Filter inputFilters, - @Nullable SortCriterion sortCriterion, + List sortCriteria, @Nullable String scrollId, @Nonnull String keepAlive, int size) { @@ -831,7 +831,7 @@ public LineageScrollResult scrollAcrossLineage( lineageRelationships, input != null ? input : "*", reducedFilters, - sortCriterion, + sortCriteria, scrollId, keepAlive, size); @@ -843,7 +843,7 @@ private LineageScrollResult getScrollResultInBatches( List lineageRelationships, @Nonnull String input, @Nullable Filter inputFilters, - @Nullable SortCriterion sortCriterion, + List sortCriteria, @Nullable String scrollId, @Nonnull String keepAlive, int size) { @@ -878,7 +878,7 @@ private LineageScrollResult getScrollResultInBatches( entitiesToQuery, input, finalFilter, - sortCriterion, + sortCriteria, scrollId, keepAlive, querySize), diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/SearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/SearchService.java index c8525f829d2066..1cd738656d972f 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/SearchService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/SearchService.java @@ -65,7 +65,7 @@ public Map docCountPerEntity( * @param input the search input text * @param postFilters the request map with fields and values as filters to be applied to search * hits - * @param sortCriterion {@link SortCriterion} to be applied to search results + * @param sortCriteria list of {@link SortCriterion} to be applied to search results * @param from index to start the search from * @param size the number of search hits to return * @return a {@link SearchResult} that contains a list of matched documents and related search @@ -77,7 +77,7 @@ public SearchResult search( @Nonnull List entityNames, @Nonnull String input, @Nullable Filter postFilters, - @Nullable SortCriterion sortCriterion, + List sortCriteria, int from, int size) { List entitiesToSearch = getEntitiesToSearch(opContext, entityNames, size); @@ -87,7 +87,7 @@ public SearchResult search( } SearchResult result = _cachingEntitySearchService.search( - opContext, entitiesToSearch, input, postFilters, sortCriterion, from, size, null); + opContext, entitiesToSearch, input, postFilters, sortCriteria, from, size, null); try { return result @@ -105,11 +105,11 @@ public SearchResult searchAcrossEntities( @Nonnull List entities, @Nonnull String input, @Nullable Filter postFilters, - @Nullable SortCriterion sortCriterion, + List sortCriteria, int from, int size) { return searchAcrossEntities( - opContext, entities, input, postFilters, sortCriterion, from, size, null); + opContext, entities, input, postFilters, sortCriteria, from, size, null); } /** @@ -120,7 +120,7 @@ public SearchResult searchAcrossEntities( * @param input the search input text * @param postFilters the request map with fields and values as filters to be applied to search * hits - * @param sortCriterion {@link SortCriterion} to be applied to search results + * @param sortCriteria list of {@link SortCriterion} to be applied to search results * @param from index to start the search from * @param size the number of search hits to return * @param facets list of facets we want aggregations for @@ -133,14 +133,14 @@ public SearchResult searchAcrossEntities( @Nonnull List entities, @Nonnull String input, @Nullable Filter postFilters, - @Nullable SortCriterion sortCriterion, + List sortCriteria, int from, int size, @Nullable List facets) { log.debug( String.format( "Searching Search documents entities: %s, input: %s, postFilters: %s, sortCriterion: %s, from: %s, size: %s", - entities, input, postFilters, sortCriterion, from, size)); + entities, input, postFilters, sortCriteria, from, size)); // DEPRECATED // This is the legacy version of `_entityType`-- it operates as a special case and does not // support ORs, Unions, etc. @@ -160,7 +160,7 @@ public SearchResult searchAcrossEntities( } SearchResult result = _cachingEntitySearchService.search( - opContext, nonEmptyEntities, input, postFilters, sortCriterion, from, size, facets); + opContext, nonEmptyEntities, input, postFilters, sortCriteria, from, size, facets); if (facets == null || facets.contains("entity") || facets.contains("_entityType")) { Optional entityTypeAgg = result.getMetadata().getAggregations().stream() @@ -238,7 +238,7 @@ public List getEntitiesToSearch( * @param input the search input text * @param postFilters the request map with fields and values as filters to be applied to search * hits - * @param sortCriterion {@link SortCriterion} to be applied to search results + * @param sortCriteria list of {@link SortCriterion} to be applied to search results * @param scrollId opaque scroll identifier for passing to search backend * @param size the number of search hits to return * @return a {@link ScrollResult} that contains a list of matched documents and related search @@ -250,21 +250,21 @@ public ScrollResult scrollAcrossEntities( @Nonnull List entities, @Nonnull String input, @Nullable Filter postFilters, - @Nullable SortCriterion sortCriterion, + List sortCriteria, @Nullable String scrollId, @Nullable String keepAlive, int size) { log.debug( String.format( - "Searching Search documents entities: %s, input: %s, postFilters: %s, sortCriterion: %s, from: %s, size: %s", - entities, input, postFilters, sortCriterion, scrollId, size)); + "Searching Search documents entities: %s, input: %s, postFilters: %s, sortCriteria: %s, from: %s, size: %s", + entities, input, postFilters, sortCriteria, scrollId, size)); List entitiesToSearch = getEntitiesToSearch(opContext, entities, size); if (entitiesToSearch.isEmpty()) { // No indices with non-zero entries: skip querying and return empty result return getEmptyScrollResult(size); } return _cachingEntitySearchService.scroll( - opContext, entitiesToSearch, input, postFilters, sortCriterion, scrollId, keepAlive, size); + opContext, entitiesToSearch, input, postFilters, sortCriteria, scrollId, keepAlive, size); } private static SearchResult getEmptySearchResult(int from, int size) { diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/SearchServiceSearchRetriever.java b/metadata-io/src/main/java/com/linkedin/metadata/search/SearchServiceSearchRetriever.java index a5ef1c8fa58b12..8d7548e0ba90a1 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/SearchServiceSearchRetriever.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/SearchServiceSearchRetriever.java @@ -43,7 +43,7 @@ public ScrollResult scroll( entities, "*", filters, - urnSort, + List.of(urnSort), scrollId, null, count); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/cache/CachedSearchResult.java b/metadata-io/src/main/java/com/linkedin/metadata/search/cache/CachedSearchResult.java new file mode 100644 index 00000000000000..7fa93be62fd97f --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/cache/CachedSearchResult.java @@ -0,0 +1,23 @@ +package com.linkedin.metadata.search.cache; + +import static com.datahub.util.RecordUtils.*; +import static com.linkedin.metadata.search.utils.GZIPUtil.*; + +import com.linkedin.metadata.search.SearchResult; +import java.io.Serializable; +import lombok.Data; + +@Data +public class CachedSearchResult implements Serializable { + private final byte[] searchResult; + private final long timestamp; + + public CachedSearchResult(SearchResult lineageResult, long timestamp) { + this.searchResult = gzipCompress(toJsonString(lineageResult)); + this.timestamp = timestamp; + } + + public SearchResult getSearchResult() { + return toRecordTemplate(SearchResult.class, gzipDecompress(searchResult)); + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java index 5db427fa901488..cb062e0e3f4483 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java @@ -20,6 +20,7 @@ import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.RequiredArgsConstructor; +import org.apache.commons.collections.CollectionUtils; import org.javatuples.Septet; import org.javatuples.Sextet; import org.springframework.cache.Cache; @@ -47,7 +48,7 @@ public class CachingEntitySearchService { * @param entityNames the names of the entity to search * @param query the search query * @param filters the filters to include - * @param sortCriterion the sort criterion + * @param sortCriteria the sort criteria * @param from the start offset * @param size the count * @param facets list of facets we want aggregations for @@ -58,12 +59,12 @@ public SearchResult search( @Nonnull List entityNames, @Nonnull String query, @Nullable Filter filters, - @Nullable SortCriterion sortCriterion, + List sortCriteria, int from, int size, @Nullable List facets) { return getCachedSearchResults( - opContext, entityNames, query, filters, sortCriterion, from, size, facets); + opContext, entityNames, query, filters, sortCriteria, from, size, facets); } /** @@ -115,7 +116,7 @@ public BrowseResult browse( * @param entities the names of the entities to search * @param query the search query * @param filters the filters to include - * @param sortCriterion the sort criterion + * @param sortCriteria the sort criteria * @param scrollId opaque scroll identifier for a scroll request * @param keepAlive the string representation of how long to keep point in time alive * @param size the count @@ -126,12 +127,12 @@ public ScrollResult scroll( @Nonnull List entities, @Nonnull String query, @Nullable Filter filters, - @Nullable SortCriterion sortCriterion, + List sortCriteria, @Nullable String scrollId, @Nullable String keepAlive, int size) { return getCachedScrollResults( - opContext, entities, query, filters, sortCriterion, scrollId, keepAlive, size); + opContext, entities, query, filters, sortCriteria, scrollId, keepAlive, size); } /** @@ -145,7 +146,7 @@ public SearchResult getCachedSearchResults( @Nonnull List entityNames, @Nonnull String query, @Nullable Filter filters, - @Nullable SortCriterion sortCriterion, + List sortCriteria, int from, int size, @Nullable List facets) { @@ -158,7 +159,7 @@ public SearchResult getCachedSearchResults( entityNames, query, filters, - sortCriterion, + sortCriteria, querySize.getFrom(), querySize.getSize(), facets), @@ -168,7 +169,7 @@ public SearchResult getCachedSearchResults( entityNames, query, filters != null ? toJsonString(filters) : null, - sortCriterion != null ? toJsonString(sortCriterion) : null, + CollectionUtils.isNotEmpty(sortCriteria) ? toJsonString(sortCriteria) : null, facets, querySize), enableCache) @@ -269,7 +270,7 @@ public ScrollResult getCachedScrollResults( @Nonnull List entities, @Nonnull String query, @Nullable Filter filters, - @Nullable SortCriterion sortCriterion, + List sortCriteria, @Nullable String scrollId, @Nullable String keepAlive, int size) { @@ -291,7 +292,7 @@ public ScrollResult getCachedScrollResults( entities, query, filters != null ? toJsonString(filters) : null, - sortCriterion != null ? toJsonString(sortCriterion) : null, + CollectionUtils.isNotEmpty(sortCriteria) ? toJsonString(sortCriteria) : null, scrollId, size); String json = cache.get(cacheKey, String.class); @@ -305,7 +306,7 @@ public ScrollResult getCachedScrollResults( entities, query, filters, - sortCriterion, + sortCriteria, scrollId, keepAlive, size, @@ -321,7 +322,7 @@ public ScrollResult getCachedScrollResults( entities, query, filters, - sortCriterion, + sortCriteria, scrollId, keepAlive, size, @@ -337,12 +338,12 @@ private SearchResult getRawSearchResults( final List entityNames, final String input, final Filter filters, - final SortCriterion sortCriterion, + final List sortCriteria, final int start, final int count, @Nullable final List facets) { return entitySearchService.search( - opContext, entityNames, input, filters, sortCriterion, start, count, facets); + opContext, entityNames, input, filters, sortCriteria, start, count, facets); } /** Executes the expensive autocomplete query using the {@link EntitySearchService} */ @@ -373,17 +374,17 @@ private ScrollResult getRawScrollResults( final List entities, final String input, final Filter filters, - final SortCriterion sortCriterion, + final List sortCriteria, @Nullable final String scrollId, @Nullable final String keepAlive, final int count, final boolean fulltext) { if (fulltext) { return entitySearchService.fullTextScroll( - opContext, entities, input, filters, sortCriterion, scrollId, keepAlive, count); + opContext, entities, input, filters, sortCriteria, scrollId, keepAlive, count); } else { return entitySearchService.structuredScroll( - opContext, entities, input, filters, sortCriterion, scrollId, keepAlive, count); + opContext, entities, input, filters, sortCriteria, scrollId, keepAlive, count); } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java index 578c34611a75aa..4d5fe8d0b8e607 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java @@ -142,10 +142,10 @@ public SearchResult search( @Nonnull List entityNames, @Nonnull String input, @Nullable Filter postFilters, - @Nullable SortCriterion sortCriterion, + List sortCriteria, int from, int size) { - return search(opContext, entityNames, input, postFilters, sortCriterion, from, size, null); + return search(opContext, entityNames, input, postFilters, sortCriteria, from, size, null); } @Nonnull @@ -154,14 +154,14 @@ public SearchResult search( @Nonnull List entityNames, @Nonnull String input, @Nullable Filter postFilters, - @Nullable SortCriterion sortCriterion, + List sortCriteria, int from, int size, @Nullable List facets) { log.debug( String.format( - "Searching FullText Search documents entityName: %s, input: %s, postFilters: %s, sortCriterion: %s, from: %s, size: %s", - entityNames, input, postFilters, sortCriterion, from, size)); + "Searching FullText Search documents entityName: %s, input: %s, postFilters: %s, sortCriteria: %s, from: %s, size: %s", + entityNames, input, postFilters, sortCriteria, from, size)); return esSearchDAO.search( opContext.withSearchFlags( @@ -169,7 +169,7 @@ public SearchResult search( entityNames, input, postFilters, - sortCriterion, + sortCriteria, from, size, facets); @@ -181,20 +181,20 @@ public SearchResult filter( @Nonnull OperationContext opContext, @Nonnull String entityName, @Nullable Filter filters, - @Nullable SortCriterion sortCriterion, + List sortCriteria, int from, int size) { log.debug( String.format( - "Filtering Search documents entityName: %s, filters: %s, sortCriterion: %s, from: %s, size: %s", - entityName, filters, sortCriterion, from, size)); + "Filtering Search documents entityName: %s, filters: %s, sortCriteria: %s, from: %s, size: %s", + entityName, filters, sortCriteria, from, size)); return esSearchDAO.filter( opContext.withSearchFlags( flags -> applyDefaultSearchFlags(flags, null, DEFAULT_SERVICE_SEARCH_FLAGS)), entityName, filters, - sortCriterion, + sortCriteria, from, size); } @@ -330,14 +330,14 @@ public ScrollResult fullTextScroll( @Nonnull List entities, @Nonnull String input, @Nullable Filter postFilters, - @Nullable SortCriterion sortCriterion, + List sortCriteria, @Nullable String scrollId, @Nullable String keepAlive, int size) { log.debug( String.format( - "Scrolling Structured Search documents entities: %s, input: %s, postFilters: %s, sortCriterion: %s, scrollId: %s, size: %s", - entities, input, postFilters, sortCriterion, scrollId, size)); + "Scrolling Structured Search documents entities: %s, input: %s, postFilters: %s, sortCriteria: %s, scrollId: %s, size: %s", + entities, input, postFilters, sortCriteria, scrollId, size)); return esSearchDAO.scroll( opContext.withSearchFlags( @@ -347,7 +347,7 @@ public ScrollResult fullTextScroll( entities, input, postFilters, - sortCriterion, + sortCriteria, scrollId, keepAlive, size); @@ -360,14 +360,14 @@ public ScrollResult structuredScroll( @Nonnull List entities, @Nonnull String input, @Nullable Filter postFilters, - @Nullable SortCriterion sortCriterion, + List sortCriteria, @Nullable String scrollId, @Nullable String keepAlive, int size) { log.debug( String.format( - "Scrolling FullText Search documents entities: %s, input: %s, postFilters: %s, sortCriterion: %s, scrollId: %s, size: %s", - entities, input, postFilters, sortCriterion, scrollId, size)); + "Scrolling FullText Search documents entities: %s, input: %s, postFilters: %s, sortCriteria: %s, scrollId: %s, size: %s", + entities, input, postFilters, sortCriteria, scrollId, size)); return esSearchDAO.scroll( opContext.withSearchFlags( @@ -377,7 +377,7 @@ public ScrollResult structuredScroll( entities, input, postFilters, - sortCriterion, + sortCriteria, scrollId, keepAlive, size); @@ -400,7 +400,7 @@ public ExplainResponse explain( @Nonnull String documentId, @Nonnull String entityName, @Nullable Filter postFilters, - @Nullable SortCriterion sortCriterion, + List sortCriteria, @Nullable String scrollId, @Nullable String keepAlive, int size, @@ -413,7 +413,7 @@ public ExplainResponse explain( documentId, entityName, postFilters, - sortCriterion, + sortCriteria, scrollId, keepAlive, size, diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java index b537a396340277..cb342794aff585 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java @@ -228,7 +228,7 @@ private ScrollResult executeAndExtract( * @param input the search input text * @param postFilters the request map with fields and values as filters to be applied to search * hits - * @param sortCriterion {@link SortCriterion} to be applied to search results + * @param sortCriteria list of {@link SortCriterion} to be applied to search results * @param from index to start the search from * @param size the number of search hits to return * @param facets list of facets we want aggregations for @@ -241,7 +241,7 @@ public SearchResult search( @Nonnull List entityNames, @Nonnull String input, @Nullable Filter postFilters, - @Nullable SortCriterion sortCriterion, + List sortCriteria, int from, int size, @Nullable List facets) { @@ -257,7 +257,7 @@ public SearchResult search( final SearchRequest searchRequest = SearchRequestHandler.getBuilder(entitySpecs, searchConfiguration, customSearchConfiguration) .getSearchRequest( - opContext, finalInput, transformedFilters, sortCriterion, from, size, facets); + opContext, finalInput, transformedFilters, sortCriteria, from, size, facets); searchRequest.indices( entityNames.stream().map(indexConvention::getEntityIndexName).toArray(String[]::new)); searchRequestTimer.stop(); @@ -270,7 +270,7 @@ public SearchResult search( * * @param filters the request map with fields and values to be applied as filters to the search * query - * @param sortCriterion {@link SortCriterion} to be applied to search results + * @param sortCriteria list of {@link SortCriterion} to be applied to search results * @param from index to start the search from * @param size number of search hits to return * @return a {@link SearchResult} that contains a list of filtered documents and related search @@ -281,7 +281,7 @@ public SearchResult filter( @Nonnull OperationContext opContext, @Nonnull String entityName, @Nullable Filter filters, - @Nullable SortCriterion sortCriterion, + List sortCriteria, int from, int size) { IndexConvention indexConvention = opContext.getSearchContext().getIndexConvention(); @@ -289,7 +289,7 @@ public SearchResult filter( Filter transformedFilters = transformFilterForEntities(filters, indexConvention); final SearchRequest searchRequest = SearchRequestHandler.getBuilder(entitySpec, searchConfiguration, customSearchConfiguration) - .getFilterRequest(opContext, transformedFilters, sortCriterion, from, size); + .getFilterRequest(opContext, transformedFilters, sortCriteria, from, size); searchRequest.indices(indexConvention.getIndexName(entitySpec)); return executeAndExtract( @@ -401,7 +401,7 @@ public Map aggregateByValue( * @param input the search input text * @param postFilters the request map with fields and values as filters to be applied to search * hits - * @param sortCriterion {@link SortCriterion} to be applied to search results + * @param sortCriteria list of {@link SortCriterion} to be applied to search results * @param scrollId opaque scroll Id to convert to a PIT ID and Sort array to pass to ElasticSearch * @param keepAlive string representation of the time to keep a point in time alive * @param size the number of search hits to return @@ -414,7 +414,7 @@ public ScrollResult scroll( @Nonnull List entities, @Nonnull String input, @Nullable Filter postFilters, - @Nullable SortCriterion sortCriterion, + List sortCriteria, @Nullable String scrollId, @Nullable String keepAlive, int size) { @@ -439,7 +439,7 @@ public ScrollResult scroll( transformedFilters, entitySpecs, finalInput, - sortCriterion, + sortCriteria, null); // PIT specifies indices in creation so it doesn't support specifying indices on the request, so @@ -462,7 +462,7 @@ private SearchRequest getScrollRequest( @Nullable Filter postFilters, List entitySpecs, String finalInput, - @Nullable SortCriterion sortCriterion, + List sortCriteria, @Nullable List facets) { String pitId = null; Object[] sort = null; @@ -483,15 +483,7 @@ private SearchRequest getScrollRequest( return SearchRequestHandler.getBuilder( entitySpecs, searchConfiguration, customSearchConfiguration) .getSearchRequest( - opContext, - finalInput, - postFilters, - sortCriterion, - sort, - pitId, - keepAlive, - size, - facets); + opContext, finalInput, postFilters, sortCriteria, sort, pitId, keepAlive, size, facets); } public Optional raw( @@ -544,7 +536,7 @@ public ExplainResponse explain( @Nonnull String documentId, @Nonnull String entityName, @Nullable Filter postFilters, - @Nullable SortCriterion sortCriterion, + List sortCriteria, @Nullable String scrollId, @Nullable String keepAlive, int size, @@ -564,7 +556,7 @@ public ExplainResponse explain( transformedFilters, Collections.singletonList(entitySpec), finalQuery, - sortCriterion, + sortCriteria, facets); ; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java index 66ad1e3be363f3..6e4210de6ef80a 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java @@ -187,7 +187,7 @@ public SearchRequest getSearchRequest( @Nonnull OperationContext opContext, @Nonnull String input, @Nullable Filter filter, - @Nullable SortCriterion sortCriterion, + List sortCriteria, int from, int size, @Nullable List facets) { @@ -213,7 +213,7 @@ public SearchRequest getSearchRequest( if (Boolean.FALSE.equals(searchFlags.isSkipHighlighting())) { searchSourceBuilder.highlighter(highlights); } - ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion, entitySpecs); + ESUtils.buildSortOrder(searchSourceBuilder, sortCriteria, entitySpecs); if (Boolean.TRUE.equals(searchFlags.isGetSuggestions())) { ESUtils.buildNameSuggestions(searchSourceBuilder, input); @@ -243,7 +243,7 @@ public SearchRequest getSearchRequest( @Nonnull OperationContext opContext, @Nonnull String input, @Nullable Filter filter, - @Nullable SortCriterion sortCriterion, + List sortCriteria, @Nullable Object[] sort, @Nullable String pitId, @Nullable String keepAlive, @@ -272,7 +272,7 @@ public SearchRequest getSearchRequest( if (Boolean.FALSE.equals(searchFlags.isSkipHighlighting())) { searchSourceBuilder.highlighter(highlights); } - ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion, entitySpecs); + ESUtils.buildSortOrder(searchSourceBuilder, sortCriteria, entitySpecs); searchRequest.source(searchSourceBuilder); log.debug("Search request is: " + searchRequest); searchRequest.indicesOptions(null); @@ -285,7 +285,7 @@ public SearchRequest getSearchRequest( * to be applied to search results. * * @param filters {@link Filter} list of conditions with fields and values - * @param sortCriterion {@link SortCriterion} to be applied to the search results + * @param sortCriteria list of {@link SortCriterion} to be applied to the search results * @param from index to start the search from * @param size the number of search hits to return * @return {@link SearchRequest} that contains the filtered query @@ -294,7 +294,7 @@ public SearchRequest getSearchRequest( public SearchRequest getFilterRequest( @Nonnull OperationContext opContext, @Nullable Filter filters, - @Nullable SortCriterion sortCriterion, + List sortCriteria, int from, int size) { SearchRequest searchRequest = new SearchRequest(); @@ -303,7 +303,7 @@ public SearchRequest getFilterRequest( final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); searchSourceBuilder.query(filterQuery); searchSourceBuilder.from(from).size(size); - ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion, entitySpecs); + ESUtils.buildSortOrder(searchSourceBuilder, sortCriteria, entitySpecs); searchRequest.source(searchSourceBuilder); return searchRequest; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java index e299dde62b1841..c4060bbb0928b2 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java @@ -299,17 +299,14 @@ public static String getElasticTypeForFieldType(SearchableAnnotation.FieldType f * * @param searchSourceBuilder {@link SearchSourceBuilder} that needs to be populated with sort * order - * @param sortCriterion {@link SortCriterion} to be applied to the search results + * @param sortCriteria list of {@link SortCriterion} to be applied to the search results */ public static void buildSortOrder( @Nonnull SearchSourceBuilder searchSourceBuilder, - @Nullable SortCriterion sortCriterion, + List sortCriteria, List entitySpecs) { buildSortOrder( - searchSourceBuilder, - sortCriterion == null ? List.of() : List.of(sortCriterion), - entitySpecs, - true); + searchSourceBuilder, sortCriteria == null ? List.of() : sortCriteria, entitySpecs, true); } /** @@ -321,20 +318,20 @@ public static void buildSortOrder( */ public static void buildSortOrder( @Nonnull SearchSourceBuilder searchSourceBuilder, - @Nonnull List sortCriterion, + @Nonnull List sortCriteria, List entitySpecs, boolean enableDefaultSort) { - if (sortCriterion.isEmpty() && enableDefaultSort) { + if (sortCriteria.isEmpty() && enableDefaultSort) { searchSourceBuilder.sort(new ScoreSortBuilder().order(SortOrder.DESC)); } else { - for (SortCriterion sortCriteria : sortCriterion) { + for (SortCriterion sortCriterion : sortCriteria) { Optional fieldTypeForDefault = Optional.empty(); for (EntitySpec entitySpec : entitySpecs) { List fieldSpecs = entitySpec.getSearchableFieldSpecs(); for (SearchableFieldSpec fieldSpec : fieldSpecs) { SearchableAnnotation annotation = fieldSpec.getSearchableAnnotation(); - if (annotation.getFieldName().equals(sortCriteria.getField()) - || annotation.getFieldNameAliases().contains(sortCriteria.getField())) { + if (annotation.getFieldName().equals(sortCriterion.getField()) + || annotation.getFieldNameAliases().contains(sortCriterion.getField())) { fieldTypeForDefault = Optional.of(fieldSpec.getSearchableAnnotation().getFieldType()); break; } @@ -346,15 +343,15 @@ public static void buildSortOrder( if (fieldTypeForDefault.isEmpty() && !entitySpecs.isEmpty()) { log.warn( "Sort criterion field " - + sortCriteria.getField() + + sortCriterion.getField() + " was not found in any entity spec to be searched"); } final SortOrder esSortOrder = - (sortCriteria.getOrder() == com.linkedin.metadata.query.filter.SortOrder.ASCENDING) + (sortCriterion.getOrder() == com.linkedin.metadata.query.filter.SortOrder.ASCENDING) ? SortOrder.ASC : SortOrder.DESC; FieldSortBuilder sortBuilder = - new FieldSortBuilder(sortCriteria.getField()).order(esSortOrder); + new FieldSortBuilder(sortCriterion.getField()).order(esSortOrder); if (fieldTypeForDefault.isPresent()) { String esFieldtype = getElasticTypeForFieldType(fieldTypeForDefault.get()); if (esFieldtype != null) { @@ -365,8 +362,8 @@ public static void buildSortOrder( } } if (enableDefaultSort - && (sortCriterion.isEmpty() - || sortCriterion.stream() + && (sortCriteria.isEmpty() + || sortCriteria.stream() .noneMatch(c -> c.getField().equals(DEFAULT_SEARCH_RESULTS_SORT_BY_FIELD)))) { searchSourceBuilder.sort( new FieldSortBuilder(DEFAULT_SEARCH_RESULTS_SORT_BY_FIELD).order(SortOrder.ASC)); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java index dff0a99a142b73..2ab9e17f281637 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java @@ -80,6 +80,7 @@ public class UpdateIndicesService implements SearchIndicesService { private final SystemMetadataService _systemMetadataService; private final SearchDocumentTransformer _searchDocumentTransformer; private final EntityIndexBuilders _entityIndexBuilders; + @Nonnull private final String idHashAlgo; @Value("${featureFlags.graphServiceDiffModeEnabled:true}") private boolean _graphDiffMode; @@ -117,13 +118,15 @@ public UpdateIndicesService( TimeseriesAspectService timeseriesAspectService, SystemMetadataService systemMetadataService, SearchDocumentTransformer searchDocumentTransformer, - EntityIndexBuilders entityIndexBuilders) { + EntityIndexBuilders entityIndexBuilders, + @Nonnull String idHashAlgo) { _graphService = graphService; _entitySearchService = entitySearchService; _timeseriesAspectService = timeseriesAspectService; _systemMetadataService = systemMetadataService; _searchDocumentTransformer = searchDocumentTransformer; _entityIndexBuilders = entityIndexBuilders; + this.idHashAlgo = idHashAlgo; } @Override @@ -601,7 +604,9 @@ private void updateTimeseriesFields( SystemMetadata systemMetadata) { Map documents; try { - documents = TimeseriesAspectTransformer.transform(urn, aspect, aspectSpec, systemMetadata); + documents = + TimeseriesAspectTransformer.transform( + urn, aspect, aspectSpec, systemMetadata, idHashAlgo); } catch (JsonProcessingException e) { log.error("Failed to generate timeseries document from aspect: {}", e.toString()); return; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ESSystemMetadataDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ESSystemMetadataDAO.java index cf1674ac004809..a5c2fb04b5ce39 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ESSystemMetadataDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ESSystemMetadataDAO.java @@ -54,10 +54,7 @@ public Optional getTaskStatus(@Nonnull String nodeId, long task try { return client.tasks().get(taskRequest, RequestOptions.DEFAULT); } catch (IOException e) { - log.error( - String.format( - "ERROR: Failed to get task status for %s:%d. See stacktrace for a more detailed error:", - nodeId, taskId)); + log.error("ERROR: Failed to get task status: ", e); e.printStackTrace(); } return Optional.empty(); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataService.java b/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataService.java index 57002a3bfc59d5..fe79ba75cb1d14 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataService.java @@ -52,6 +52,7 @@ public class ElasticSearchSystemMetadataService private final IndexConvention _indexConvention; private final ESSystemMetadataDAO _esDAO; private final ESIndexBuilder _indexBuilder; + @Nonnull private final String elasticIdHashAlgo; private static final String DOC_DELIMETER = "--"; public static final String INDEX_NAME = "system_metadata_service_v1"; @@ -86,10 +87,9 @@ private String toDocument(SystemMetadata systemMetadata, String urn, String aspe private String toDocId(@Nonnull final String urn, @Nonnull final String aspect) { String rawDocId = urn + DOC_DELIMETER + aspect; - try { byte[] bytesOfRawDocID = rawDocId.getBytes(StandardCharsets.UTF_8); - MessageDigest md = MessageDigest.getInstance("MD5"); + MessageDigest md = MessageDigest.getInstance(elasticIdHashAlgo); byte[] thedigest = md.digest(bytesOfRawDocID); return Base64.getEncoder().encodeToString(thedigest); } catch (NoSuchAlgorithmException e) { diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java index ce4ff53eba91b9..9b4d373d25d8fb 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java @@ -551,7 +551,7 @@ public TimeseriesScrollResult scrollAspects( @Nonnull String entityName, @Nonnull String aspectName, @Nullable Filter filter, - @Nonnull List sortCriterion, + @Nonnull List sortCriteria, @Nullable String scrollId, int count, @Nullable Long startTimeMillis, @@ -592,7 +592,7 @@ public TimeseriesScrollResult scrollAspects( entityName, aspectName, filterQueryBuilder, - sortCriterion, + sortCriteria, scrollId, count); int totalCount = (int) response.getHits().getTotalHits().value; @@ -615,7 +615,7 @@ private SearchResponse executeScrollSearchQuery( @Nonnull final String entityName, @Nonnull final String aspectName, @Nonnull final QueryBuilder query, - @Nonnull List sortCriterion, + @Nonnull List sortCriteria, @Nullable String scrollId, final int count) { @@ -631,7 +631,7 @@ private SearchResponse executeScrollSearchQuery( searchSourceBuilder.size(count); searchSourceBuilder.query(query); - ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion, List.of(), false); + ESUtils.buildSortOrder(searchSourceBuilder, sortCriteria, List.of(), false); searchRequest.source(searchSourceBuilder); ESUtils.setSearchAfter(searchSourceBuilder, sort, null, null); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/transformer/TimeseriesAspectTransformer.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/transformer/TimeseriesAspectTransformer.java index c0f66acaaca5af..c353e601a31b70 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/transformer/TimeseriesAspectTransformer.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/transformer/TimeseriesAspectTransformer.java @@ -54,7 +54,8 @@ public static Map transform( @Nonnull final Urn urn, @Nonnull final RecordTemplate timeseriesAspect, @Nonnull final AspectSpec aspectSpec, - @Nullable final SystemMetadata systemMetadata) + @Nullable final SystemMetadata systemMetadata, + @Nonnull final String idHashAlgo) throws JsonProcessingException { ObjectNode commonDocument = getCommonDocument(urn, timeseriesAspect, systemMetadata); Map finalDocuments = new HashMap<>(); @@ -74,7 +75,7 @@ public static Map transform( final Map> timeseriesFieldValueMap = FieldExtractor.extractFields(timeseriesAspect, aspectSpec.getTimeseriesFieldSpecs()); timeseriesFieldValueMap.forEach((k, v) -> setTimeseriesField(document, k, v)); - finalDocuments.put(getDocId(document, null), document); + finalDocuments.put(getDocId(document, null, idHashAlgo), document); // Create new rows for the member collection fields. final Map> timeseriesFieldCollectionValueMap = @@ -83,7 +84,7 @@ public static Map transform( timeseriesFieldCollectionValueMap.forEach( (key, values) -> finalDocuments.putAll( - getTimeseriesFieldCollectionDocuments(key, values, commonDocument))); + getTimeseriesFieldCollectionDocuments(key, values, commonDocument, idHashAlgo))); return finalDocuments; } @@ -216,12 +217,13 @@ private static void setTimeseriesField( private static Map getTimeseriesFieldCollectionDocuments( final TimeseriesFieldCollectionSpec fieldSpec, final List values, - final ObjectNode commonDocument) { + final ObjectNode commonDocument, + @Nonnull final String idHashAlgo) { return values.stream() .map(value -> getTimeseriesFieldCollectionDocument(fieldSpec, value, commonDocument)) .collect( Collectors.toMap( - keyDocPair -> getDocId(keyDocPair.getSecond(), keyDocPair.getFirst()), + keyDocPair -> getDocId(keyDocPair.getSecond(), keyDocPair.getFirst(), idHashAlgo), Pair::getSecond)); } @@ -257,7 +259,9 @@ private static Pair getTimeseriesFieldCollectionDocument( finalDocument); } - private static String getDocId(@Nonnull JsonNode document, String collectionId) { + private static String getDocId( + @Nonnull JsonNode document, String collectionId, @Nonnull String idHashAlgo) + throws IllegalArgumentException { String docId = document.get(MappingsBuilder.TIMESTAMP_MILLIS_FIELD).toString(); JsonNode eventGranularity = document.get(MappingsBuilder.EVENT_GRANULARITY); if (eventGranularity != null) { @@ -276,6 +280,11 @@ private static String getDocId(@Nonnull JsonNode document, String collectionId) docId += partitionSpec.toString(); } - return DigestUtils.md5Hex(docId); + if (idHashAlgo.equalsIgnoreCase("SHA-256")) { + return DigestUtils.sha256Hex(docId); + } else if (idHashAlgo.equalsIgnoreCase("MD5")) { + return DigestUtils.md5Hex(docId); + } + throw new IllegalArgumentException("Hash function not handled !"); } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/AspectGenerationUtils.java b/metadata-io/src/test/java/com/linkedin/metadata/AspectGenerationUtils.java index b9a1817f476fba..346a1eef845923 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/AspectGenerationUtils.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/AspectGenerationUtils.java @@ -1,5 +1,7 @@ package com.linkedin.metadata; +import static com.linkedin.metadata.utils.SystemMetadataUtils.createDefaultSystemMetadata; + import com.linkedin.chart.ChartInfo; import com.linkedin.common.AuditStamp; import com.linkedin.common.ChangeAuditStamps; @@ -48,11 +50,10 @@ public static SystemMetadata createSystemMetadata( @Nonnull String runId, @Nonnull String lastRunId, @Nullable String version) { - SystemMetadata metadata = new SystemMetadata(); - metadata.setLastObserved(lastObserved); - metadata.setRunId(runId); + SystemMetadata metadata = createDefaultSystemMetadata(runId); metadata.setLastRunId(lastRunId); metadata.setVersion(version, SetMode.IGNORE_NULL); + metadata.setLastObserved(lastObserved); return metadata; } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java index acdbd7855f7b0b..53f5ebfe59728e 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java @@ -602,8 +602,9 @@ public void testReingestLineageAspect() throws Exception { GenericAspect aspect = GenericRecordUtils.serializeAspect(pairToIngest.get(0).getSecond()); + SystemMetadata initialSystemMetadata = AspectGenerationUtils.createSystemMetadata(1); initialChangeLog.setAspect(aspect); - initialChangeLog.setSystemMetadata(AspectGenerationUtils.createSystemMetadata(1)); + initialChangeLog.setSystemMetadata(initialSystemMetadata); initialChangeLog.setEntityKeyAspect( GenericRecordUtils.serializeAspect( EntityKeyUtils.convertUrnToEntityKey( @@ -620,7 +621,7 @@ public void testReingestLineageAspect() throws Exception { restateChangeLog.setSystemMetadata(AspectGenerationUtils.createSystemMetadata(1)); restateChangeLog.setPreviousAspectValue(aspect); restateChangeLog.setPreviousSystemMetadata( - simulatePullFromDB(AspectGenerationUtils.createSystemMetadata(1), SystemMetadata.class)); + simulatePullFromDB(initialSystemMetadata, SystemMetadata.class)); restateChangeLog.setEntityKeyAspect( GenericRecordUtils.serializeAspect( EntityKeyUtils.convertUrnToEntityKey( diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBase.java index b430313f5904b3..5d9a5079f2a3b3 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBase.java @@ -2196,7 +2196,7 @@ public void testHighlyConnectedGraphWalk() throws Exception { relationships.stream() .flatMap(relationship -> relationship.getDegrees().stream()) .reduce(0, Math::max); - assertTrue(maxDegree > 1); + assertTrue(maxDegree >= 1); EntityLineageResult lineageResultMulti = getGraphService(true) diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/search/SearchGraphServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/SearchGraphServiceTestBase.java index 06f1369ff0670c..d1a51b1d69b2c3 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/search/SearchGraphServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/SearchGraphServiceTestBase.java @@ -62,7 +62,7 @@ public abstract class SearchGraphServiceTestBase extends GraphServiceTestBase { @Nonnull protected abstract ESIndexBuilder getIndexBuilder(); - private final IndexConvention _indexConvention = IndexConventionImpl.NO_PREFIX; + private final IndexConvention _indexConvention = IndexConventionImpl.noPrefix("MD5"); private final String _indexName = _indexConvention.getIndexName(INDEX_NAME); private ElasticSearchGraphService _client; @@ -108,7 +108,8 @@ private ElasticSearchGraphService buildService(boolean enableMultiPathSearch) { _indexConvention, writeDAO, readDAO, - getIndexBuilder()); + getIndexBuilder(), + "MD5"); } @Override diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java index a9d84ae1f3aea1..99e4923885a41d 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java @@ -122,7 +122,7 @@ public void setup() throws RemoteInvocationException, URISyntaxException { operationContext = TestOperationContexts.systemContextNoSearchAuthorization( new SnapshotEntityRegistry(new Snapshot()), - new IndexConventionImpl("lineage_search_service_test")) + new IndexConventionImpl("lineage_search_service_test", "MD5")) .asSession(RequestContext.TEST, Authorizer.EMPTY, TestOperationContexts.TEST_USER_AUTH); settingsBuilder = new SettingsBuilder(null); elasticSearchService = buildEntitySearchService(); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTestBase.java index 445b71b2eaff62..5e30e01a8ea690 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTestBase.java @@ -79,7 +79,7 @@ public void setup() throws RemoteInvocationException, URISyntaxException { operationContext = TestOperationContexts.systemContextNoSearchAuthorization( new SnapshotEntityRegistry(new Snapshot()), - new IndexConventionImpl("search_service_test")) + new IndexConventionImpl("search_service_test", "MD5")) .asSession(RequestContext.TEST, Authorizer.EMPTY, TestOperationContexts.TEST_USER_AUTH); settingsBuilder = new SettingsBuilder(null); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/TestEntityTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/TestEntityTestBase.java index ab5e90f77c21aa..282a3d8e3ea6ae 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/TestEntityTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/TestEntityTestBase.java @@ -62,7 +62,8 @@ public abstract class TestEntityTestBase extends AbstractTestNGSpringContextTest public void setup() { opContext = TestOperationContexts.systemContextNoSearchAuthorization( - new SnapshotEntityRegistry(new Snapshot()), new IndexConventionImpl("es_service_test")); + new SnapshotEntityRegistry(new Snapshot()), + new IndexConventionImpl("es_service_test", "MD5")); settingsBuilder = new SettingsBuilder(null); elasticSearchService = buildService(); elasticSearchService.reindexAll(Collections.emptySet()); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java index a3f6f39e6387c4..7e434bf93329e2 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java @@ -216,7 +216,7 @@ public void testGetSortOrder() { SearchSourceBuilder builder = new SearchSourceBuilder(); SortCriterion sortCriterion = new SortCriterion().setOrder(SortOrder.DESCENDING).setField(dateFieldName); - ESUtils.buildSortOrder(builder, sortCriterion, entitySpecs); + ESUtils.buildSortOrder(builder, Collections.singletonList(sortCriterion), entitySpecs); List> sorts = builder.sorts(); assertEquals(sorts.size(), 2); // sort by last modified and then by urn for (SortBuilder sort : sorts) { @@ -235,7 +235,7 @@ public void testGetSortOrder() { SearchSourceBuilder nameBuilder = new SearchSourceBuilder(); SortCriterion nameCriterion = new SortCriterion().setOrder(SortOrder.ASCENDING).setField(entityNameField); - ESUtils.buildSortOrder(nameBuilder, nameCriterion, entitySpecs); + ESUtils.buildSortOrder(nameBuilder, Collections.singletonList(nameCriterion), entitySpecs); sorts = nameBuilder.sorts(); assertEquals(sorts.size(), 2); for (SortBuilder sort : sorts) { @@ -1959,7 +1959,7 @@ public void testSortOrdering() { SEARCHABLE_ENTITIES, query, null, - criterion, + Collections.singletonList(criterion), 0, 100, null); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/BrowseDAOTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/BrowseDAOTest.java index a0288d019644bd..8044515e3dc6a7 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/BrowseDAOTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/BrowseDAOTest.java @@ -45,7 +45,7 @@ public void setup() throws RemoteInvocationException, URISyntaxException { mockClient = mock(RestHighLevelClient.class); opContext = TestOperationContexts.systemContextNoSearchAuthorization( - new IndexConventionImpl("es_browse_dao_test")); + new IndexConventionImpl("es_browse_dao_test", "MD5")); browseDAO = new ESBrowseDAO(mockClient, searchConfiguration, customSearchConfiguration); } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/systemmetadata/SystemMetadataServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/systemmetadata/SystemMetadataServiceTestBase.java index d843191bed7413..1b9d8c57b4cad3 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/systemmetadata/SystemMetadataServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/systemmetadata/SystemMetadataServiceTestBase.java @@ -32,7 +32,7 @@ public abstract class SystemMetadataServiceTestBase extends AbstractTestNGSpring protected abstract ESIndexBuilder getIndexBuilder(); private final IndexConvention _indexConvention = - new IndexConventionImpl("es_system_metadata_service_test"); + new IndexConventionImpl("es_system_metadata_service_test", "MD5"); private ElasticSearchSystemMetadataService _client; @@ -54,7 +54,7 @@ private ElasticSearchSystemMetadataService buildService() { ESSystemMetadataDAO dao = new ESSystemMetadataDAO(getSearchClient(), _indexConvention, getBulkProcessor(), 1); return new ElasticSearchSystemMetadataService( - getBulkProcessor(), _indexConvention, dao, getIndexBuilder()); + getBulkProcessor(), _indexConvention, dao, getIndexBuilder(), "MD5"); } @Test diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java index 10c6f09cb8f8d6..414183c8882f9c 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java @@ -126,7 +126,7 @@ public void setup() throws RemoteInvocationException, URISyntaxException { opContext = TestOperationContexts.systemContextNoSearchAuthorization( - entityRegistry, new IndexConventionImpl("es_timeseries_aspect_service_test")); + entityRegistry, new IndexConventionImpl("es_timeseries_aspect_service_test", "MD5")); elasticSearchTimeseriesAspectService = buildService(); elasticSearchTimeseriesAspectService.reindexAll(Collections.emptySet()); @@ -152,7 +152,7 @@ private ElasticSearchTimeseriesAspectService buildService() { private void upsertDocument(TestEntityProfile dp, Urn urn) throws JsonProcessingException { Map documents = - TimeseriesAspectTransformer.transform(urn, dp, aspectSpec, null); + TimeseriesAspectTransformer.transform(urn, dp, aspectSpec, null, "MD5"); assertEquals(documents.size(), 3); documents.forEach( (key, value) -> diff --git a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java index 28a4a2b00cd6f1..6a95d16c254370 100644 --- a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java +++ b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java @@ -86,12 +86,12 @@ protected String longTailIndexPrefix() { @Bean(name = "sampleDataIndexConvention") protected IndexConvention indexConvention(@Qualifier("sampleDataPrefix") String prefix) { - return new IndexConventionImpl(prefix); + return new IndexConventionImpl(prefix, "MD5"); } @Bean(name = "longTailIndexConvention") protected IndexConvention longTailIndexConvention(@Qualifier("longTailPrefix") String prefix) { - return new IndexConventionImpl(prefix); + return new IndexConventionImpl(prefix, "MD5"); } @Bean(name = "sampleDataFixtureName") diff --git a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java index e783c011de6d0e..33e04af83c0a3a 100644 --- a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java +++ b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java @@ -71,7 +71,7 @@ protected String indexPrefix() { @Bean(name = "searchLineageIndexConvention") protected IndexConvention indexConvention(@Qualifier("searchLineagePrefix") String prefix) { - return new IndexConventionImpl(prefix); + return new IndexConventionImpl(prefix, "MD5"); } @Bean(name = "searchLineageFixtureName") @@ -173,7 +173,8 @@ protected ElasticSearchGraphService graphService( new ESGraphWriteDAO(indexConvention, bulkProcessor, 1), new ESGraphQueryDAO( searchClient, lineageRegistry, indexConvention, getGraphQueryConfiguration()), - indexBuilder); + indexBuilder, + indexConvention.getIdHashAlgo()); graphService.reindexAll(Collections.emptySet()); return graphService; } diff --git a/metadata-jobs/mae-consumer-job/src/main/java/com/linkedin/metadata/kafka/MaeConsumerApplication.java b/metadata-jobs/mae-consumer-job/src/main/java/com/linkedin/metadata/kafka/MaeConsumerApplication.java index f6533a6ac1d8a9..617bc8e0b73030 100644 --- a/metadata-jobs/mae-consumer-job/src/main/java/com/linkedin/metadata/kafka/MaeConsumerApplication.java +++ b/metadata-jobs/mae-consumer-job/src/main/java/com/linkedin/metadata/kafka/MaeConsumerApplication.java @@ -18,8 +18,6 @@ "com.linkedin.metadata.service", "com.datahub.event", "com.linkedin.gms.factory.kafka", - "com.linkedin.gms.factory.kafka.common", - "com.linkedin.gms.factory.kafka.schemaregistry", "com.linkedin.metadata.boot.kafka", "com.linkedin.metadata.kafka", "com.linkedin.metadata.dao.producer", @@ -34,7 +32,10 @@ "com.linkedin.gms.factory.context", "com.linkedin.gms.factory.timeseries", "com.linkedin.gms.factory.assertion", - "com.linkedin.gms.factory.plugins" + "com.linkedin.gms.factory.plugins", + "com.linkedin.gms.factory.change", + "com.datahub.event.hook", + "com.linkedin.gms.factory.notifications" }, excludeFilters = { @ComponentScan.Filter( diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MCLKafkaListener.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MCLKafkaListener.java new file mode 100644 index 00000000000000..70b452722abc76 --- /dev/null +++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MCLKafkaListener.java @@ -0,0 +1,103 @@ +package com.linkedin.metadata.kafka; + +import com.codahale.metrics.Histogram; +import com.codahale.metrics.MetricRegistry; +import com.codahale.metrics.Timer; +import com.linkedin.metadata.EventUtils; +import com.linkedin.metadata.kafka.hook.MetadataChangeLogHook; +import com.linkedin.metadata.utils.metrics.MetricUtils; +import com.linkedin.mxe.MetadataChangeLog; +import io.datahubproject.metadata.context.OperationContext; +import java.util.List; +import java.util.stream.Collectors; +import lombok.extern.slf4j.Slf4j; +import org.apache.avro.generic.GenericRecord; +import org.apache.kafka.clients.consumer.ConsumerRecord; + +@Slf4j +public class MCLKafkaListener { + private static final Histogram kafkaLagStats = + MetricUtils.get() + .histogram( + MetricRegistry.name( + "com.linkedin.metadata.kafka.MetadataChangeLogProcessor", "kafkaLag")); + + private final String consumerGroupId; + private final List hooks; + + public MCLKafkaListener( + OperationContext systemOperationContext, + String consumerGroup, + List hooks) { + this.consumerGroupId = consumerGroup; + this.hooks = hooks; + this.hooks.forEach(hook -> hook.init(systemOperationContext)); + + log.info( + "Enabled MCL Hooks - Group: {} Hooks: {}", + consumerGroup, + hooks.stream().map(hook -> hook.getClass().getSimpleName()).collect(Collectors.toList())); + } + + public void consume(final ConsumerRecord consumerRecord) { + try (Timer.Context i = MetricUtils.timer(this.getClass(), "consume").time()) { + kafkaLagStats.update(System.currentTimeMillis() - consumerRecord.timestamp()); + final GenericRecord record = consumerRecord.value(); + log.debug( + "Got MCL event consumer: {} key: {}, topic: {}, partition: {}, offset: {}, value size: {}, timestamp: {}", + consumerGroupId, + consumerRecord.key(), + consumerRecord.topic(), + consumerRecord.partition(), + consumerRecord.offset(), + consumerRecord.serializedValueSize(), + consumerRecord.timestamp()); + MetricUtils.counter(this.getClass(), consumerGroupId + "_received_mcl_count").inc(); + + MetadataChangeLog event; + try { + event = EventUtils.avroToPegasusMCL(record); + } catch (Exception e) { + MetricUtils.counter( + this.getClass(), consumerGroupId + "_avro_to_pegasus_conversion_failure") + .inc(); + log.error("Error deserializing message due to: ", e); + log.error("Message: {}", record.toString()); + return; + } + + log.info( + "Invoking MCL hooks for consumer: {} urn: {}, aspect name: {}, entity type: {}, change type: {}", + consumerGroupId, + event.getEntityUrn(), + event.hasAspectName() ? event.getAspectName() : null, + event.hasEntityType() ? event.getEntityType() : null, + event.hasChangeType() ? event.getChangeType() : null); + + // Here - plug in additional "custom processor hooks" + for (MetadataChangeLogHook hook : this.hooks) { + log.info( + "Invoking MCL hook {} for urn: {}", + hook.getClass().getSimpleName(), + event.getEntityUrn()); + try (Timer.Context ignored = + MetricUtils.timer(this.getClass(), hook.getClass().getSimpleName() + "_latency") + .time()) { + hook.invoke(event); + } catch (Exception e) { + // Just skip this hook and continue. - Note that this represents "at most once"// + // processing. + MetricUtils.counter(this.getClass(), hook.getClass().getSimpleName() + "_failure").inc(); + log.error( + "Failed to execute MCL hook with name {}", hook.getClass().getCanonicalName(), e); + } + } + // TODO: Manually commit kafka offsets after full processing. + MetricUtils.counter(this.getClass(), consumerGroupId + "_consumed_mcl_count").inc(); + log.info( + "Successfully completed MCL hooks for consumer: {} urn: {}", + consumerGroupId, + event.getEntityUrn()); + } + } +} diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MCLKafkaListenerRegistrar.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MCLKafkaListenerRegistrar.java new file mode 100644 index 00000000000000..fb2880f617d301 --- /dev/null +++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MCLKafkaListenerRegistrar.java @@ -0,0 +1,120 @@ +package com.linkedin.metadata.kafka; + +import com.linkedin.metadata.kafka.config.MetadataChangeLogProcessorCondition; +import com.linkedin.metadata.kafka.hook.MetadataChangeLogHook; +import com.linkedin.mxe.Topics; +import io.datahubproject.metadata.context.OperationContext; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import lombok.SneakyThrows; +import lombok.extern.slf4j.Slf4j; +import org.apache.avro.generic.GenericRecord; +import org.apache.kafka.clients.consumer.ConsumerRecord; +import org.springframework.beans.factory.InitializingBean; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Conditional; +import org.springframework.kafka.annotation.EnableKafka; +import org.springframework.kafka.config.KafkaListenerContainerFactory; +import org.springframework.kafka.config.KafkaListenerEndpoint; +import org.springframework.kafka.config.KafkaListenerEndpointRegistry; +import org.springframework.kafka.config.MethodKafkaListenerEndpoint; +import org.springframework.messaging.handler.annotation.support.DefaultMessageHandlerMethodFactory; +import org.springframework.stereotype.Component; + +@Slf4j +@EnableKafka +@Component +@Conditional(MetadataChangeLogProcessorCondition.class) +public class MCLKafkaListenerRegistrar implements InitializingBean { + + @Autowired + @Qualifier("systemOperationContext") + private OperationContext systemOperationContext; + + @Autowired private KafkaListenerEndpointRegistry kafkaListenerEndpointRegistry; + + @Autowired + @Qualifier("kafkaEventConsumer") + private KafkaListenerContainerFactory kafkaListenerContainerFactory; + + @Value("${METADATA_CHANGE_LOG_KAFKA_CONSUMER_GROUP_ID:generic-mae-consumer-job-client}") + private String consumerGroupBase; + + @Value("${METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME:" + Topics.METADATA_CHANGE_LOG_VERSIONED + "}") + private String mclVersionedTopicName; + + @Value( + "${METADATA_CHANGE_LOG_TIMESERIES_TOPIC_NAME:" + Topics.METADATA_CHANGE_LOG_TIMESERIES + "}") + private String mclTimeseriesTopicName; + + @Autowired private List metadataChangeLogHooks; + + @Override + public void afterPropertiesSet() { + Map> hookGroups = + getMetadataChangeLogHooks().stream() + .collect(Collectors.groupingBy(MetadataChangeLogHook::getConsumerGroupSuffix)); + + log.info( + "MetadataChangeLogProcessor Consumer Groups: {}", + hookGroups.keySet().stream().map(this::buildConsumerGroupName).collect(Collectors.toSet())); + + hookGroups.forEach( + (key, hooks) -> { + KafkaListenerEndpoint kafkaListenerEndpoint = + createListenerEndpoint( + buildConsumerGroupName(key), + List.of(mclVersionedTopicName, mclTimeseriesTopicName), + hooks); + registerMCLKafkaListener(kafkaListenerEndpoint, true); + }); + } + + public List getMetadataChangeLogHooks() { + return metadataChangeLogHooks.stream() + .filter(MetadataChangeLogHook::isEnabled) + .sorted(Comparator.comparing(MetadataChangeLogHook::executionOrder)) + .toList(); + } + + @SneakyThrows + public void registerMCLKafkaListener( + KafkaListenerEndpoint kafkaListenerEndpoint, boolean startImmediately) { + kafkaListenerEndpointRegistry.registerListenerContainer( + kafkaListenerEndpoint, kafkaListenerContainerFactory, startImmediately); + } + + private KafkaListenerEndpoint createListenerEndpoint( + String consumerGroupId, List topics, List hooks) { + MethodKafkaListenerEndpoint kafkaListenerEndpoint = + new MethodKafkaListenerEndpoint<>(); + kafkaListenerEndpoint.setId(consumerGroupId); + kafkaListenerEndpoint.setGroupId(consumerGroupId); + kafkaListenerEndpoint.setAutoStartup(true); + kafkaListenerEndpoint.setTopics(topics.toArray(new String[topics.size()])); + kafkaListenerEndpoint.setMessageHandlerMethodFactory(new DefaultMessageHandlerMethodFactory()); + kafkaListenerEndpoint.setBean( + new MCLKafkaListener(systemOperationContext, consumerGroupId, hooks)); + try { + kafkaListenerEndpoint.setMethod( + MCLKafkaListener.class.getMethod("consume", ConsumerRecord.class)); + } catch (NoSuchMethodException e) { + throw new RuntimeException(e); + } + + return kafkaListenerEndpoint; + } + + private String buildConsumerGroupName(@Nonnull String suffix) { + if (suffix.isEmpty()) { + return consumerGroupBase; + } else { + return String.join("-", consumerGroupBase, suffix); + } + } +} diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeLogProcessor.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeLogProcessor.java deleted file mode 100644 index 6112ad798d73dc..00000000000000 --- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeLogProcessor.java +++ /dev/null @@ -1,140 +0,0 @@ -package com.linkedin.metadata.kafka; - -import com.codahale.metrics.Histogram; -import com.codahale.metrics.MetricRegistry; -import com.codahale.metrics.Timer; -import com.linkedin.gms.factory.kafka.KafkaEventConsumerFactory; -import com.linkedin.metadata.EventUtils; -import com.linkedin.metadata.kafka.config.MetadataChangeLogProcessorCondition; -import com.linkedin.metadata.kafka.hook.MetadataChangeLogHook; -import com.linkedin.metadata.kafka.hook.UpdateIndicesHook; -import com.linkedin.metadata.kafka.hook.event.EntityChangeEventGeneratorHook; -import com.linkedin.metadata.kafka.hook.form.FormAssignmentHook; -import com.linkedin.metadata.kafka.hook.incident.IncidentsSummaryHook; -import com.linkedin.metadata.kafka.hook.ingestion.IngestionSchedulerHook; -import com.linkedin.metadata.kafka.hook.siblings.SiblingAssociationHook; -import com.linkedin.metadata.utils.metrics.MetricUtils; -import com.linkedin.mxe.MetadataChangeLog; -import com.linkedin.mxe.Topics; -import io.datahubproject.metadata.context.OperationContext; -import java.util.Comparator; -import java.util.List; -import java.util.stream.Collectors; -import lombok.Getter; -import lombok.extern.slf4j.Slf4j; -import org.apache.avro.generic.GenericRecord; -import org.apache.kafka.clients.consumer.ConsumerRecord; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.beans.factory.annotation.Qualifier; -import org.springframework.context.annotation.Conditional; -import org.springframework.context.annotation.Import; -import org.springframework.kafka.annotation.EnableKafka; -import org.springframework.kafka.annotation.KafkaListener; -import org.springframework.stereotype.Component; - -@Slf4j -@Component -@Conditional(MetadataChangeLogProcessorCondition.class) -@Import({ - UpdateIndicesHook.class, - IngestionSchedulerHook.class, - EntityChangeEventGeneratorHook.class, - KafkaEventConsumerFactory.class, - SiblingAssociationHook.class, - FormAssignmentHook.class, - IncidentsSummaryHook.class, -}) -@EnableKafka -public class MetadataChangeLogProcessor { - - @Getter private final List hooks; - private final Histogram kafkaLagStats = - MetricUtils.get().histogram(MetricRegistry.name(this.getClass(), "kafkaLag")); - - @Autowired - public MetadataChangeLogProcessor( - @Qualifier("systemOperationContext") OperationContext systemOperationContext, - List metadataChangeLogHooks) { - this.hooks = - metadataChangeLogHooks.stream() - .filter(MetadataChangeLogHook::isEnabled) - .sorted(Comparator.comparing(MetadataChangeLogHook::executionOrder)) - .collect(Collectors.toList()); - log.info( - "Enabled hooks: {}", - this.hooks.stream() - .map(hook -> hook.getClass().getSimpleName()) - .collect(Collectors.toList())); - this.hooks.forEach(hook -> hook.init(systemOperationContext)); - } - - @KafkaListener( - id = "${METADATA_CHANGE_LOG_KAFKA_CONSUMER_GROUP_ID:generic-mae-consumer-job-client}", - topics = { - "${METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME:" + Topics.METADATA_CHANGE_LOG_VERSIONED + "}", - "${METADATA_CHANGE_LOG_TIMESERIES_TOPIC_NAME:" + Topics.METADATA_CHANGE_LOG_TIMESERIES + "}" - }, - containerFactory = "kafkaEventConsumer") - public void consume(final ConsumerRecord consumerRecord) { - try (Timer.Context i = MetricUtils.timer(this.getClass(), "consume").time()) { - kafkaLagStats.update(System.currentTimeMillis() - consumerRecord.timestamp()); - final GenericRecord record = consumerRecord.value(); - log.info( - "Got MCL event key: {}, topic: {}, partition: {}, offset: {}, value size: {}, timestamp: {}", - consumerRecord.key(), - consumerRecord.topic(), - consumerRecord.partition(), - consumerRecord.offset(), - consumerRecord.serializedValueSize(), - consumerRecord.timestamp()); - MetricUtils.counter(this.getClass(), "received_mcl_count").inc(); - - MetadataChangeLog event; - try { - event = EventUtils.avroToPegasusMCL(record); - log.debug( - "Successfully converted Avro MCL to Pegasus MCL. urn: {}, key: {}", - event.getEntityUrn(), - event.getEntityKeyAspect()); - } catch (Exception e) { - MetricUtils.counter(this.getClass(), "avro_to_pegasus_conversion_failure").inc(); - log.error("Error deserializing message due to: ", e); - log.error("Message: {}", record.toString()); - return; - } - - log.info( - "Invoking MCL hooks for urn: {}, aspect name: {}, entity type: {}, change type: {}", - event.getEntityUrn(), - event.hasAspectName() ? event.getAspectName() : null, - event.hasEntityType() ? event.getEntityType() : null, - event.hasChangeType() ? event.getChangeType() : null); - - // Here - plug in additional "custom processor hooks" - for (MetadataChangeLogHook hook : this.hooks) { - if (!hook.isEnabled()) { - log.info(String.format("Skipping disabled hook %s", hook.getClass())); - continue; - } - log.info( - "Invoking MCL hook {} for urn: {}", - hook.getClass().getSimpleName(), - event.getEntityUrn()); - try (Timer.Context ignored = - MetricUtils.timer(this.getClass(), hook.getClass().getSimpleName() + "_latency") - .time()) { - hook.invoke(event); - } catch (Exception e) { - // Just skip this hook and continue. - Note that this represents "at most once"// - // processing. - MetricUtils.counter(this.getClass(), hook.getClass().getSimpleName() + "_failure").inc(); - log.error( - "Failed to execute MCL hook with name {}", hook.getClass().getCanonicalName(), e); - } - } - // TODO: Manually commit kafka offsets after full processing. - MetricUtils.counter(this.getClass(), "consumed_mcl_count").inc(); - log.info("Successfully completed MCL hooks for urn: {}", event.getEntityUrn()); - } - } -} diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/MetadataChangeLogHook.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/MetadataChangeLogHook.java index 145d1ded724cc0..876df4279b7b8a 100644 --- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/MetadataChangeLogHook.java +++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/MetadataChangeLogHook.java @@ -18,13 +18,19 @@ default MetadataChangeLogHook init(@Nonnull OperationContext systemOperationCont return this; } + /** + * Suffix for the consumer group + * + * @return suffix + */ + @Nonnull + String getConsumerGroupSuffix(); + /** * Return whether the hook is enabled or not. If not enabled, the below invoke method is not * triggered */ - default boolean isEnabled() { - return true; - } + boolean isEnabled(); /** Invoke the hook when a MetadataChangeLog is received */ void invoke(@Nonnull MetadataChangeLog log) throws Exception; diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHook.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHook.java index a0e304b26ea60f..bd804b0f4424ca 100644 --- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHook.java +++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHook.java @@ -2,6 +2,7 @@ import static com.linkedin.metadata.Constants.*; +import com.google.common.annotations.VisibleForTesting; import com.linkedin.gms.factory.common.GraphServiceFactory; import com.linkedin.gms.factory.common.SystemMetadataServiceFactory; import com.linkedin.gms.factory.entityregistry.EntityRegistryFactory; @@ -12,7 +13,9 @@ import com.linkedin.mxe.MetadataChangeLog; import io.datahubproject.metadata.context.OperationContext; import javax.annotation.Nonnull; +import lombok.Getter; import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Import; import org.springframework.stereotype.Component; @@ -34,15 +37,27 @@ public class UpdateIndicesHook implements MetadataChangeLogHook { private final boolean isEnabled; private final boolean reprocessUIEvents; private OperationContext systemOperationContext; + @Getter private final String consumerGroupSuffix; + @Autowired public UpdateIndicesHook( UpdateIndicesService updateIndicesService, @Nonnull @Value("${updateIndices.enabled:true}") Boolean isEnabled, @Nonnull @Value("${featureFlags.preProcessHooks.reprocessEnabled:false}") - Boolean reprocessUIEvents) { + Boolean reprocessUIEvents, + @Nonnull @Value("${updateIndices.consumerGroupSuffix}") String consumerGroupSuffix) { this.updateIndicesService = updateIndicesService; this.isEnabled = isEnabled; this.reprocessUIEvents = reprocessUIEvents; + this.consumerGroupSuffix = consumerGroupSuffix; + } + + @VisibleForTesting + public UpdateIndicesHook( + UpdateIndicesService updateIndicesService, + @Nonnull Boolean isEnabled, + @Nonnull Boolean reprocessUIEvents) { + this(updateIndicesService, isEnabled, reprocessUIEvents, ""); } @Override diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHook.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHook.java index 8dc98d77233ceb..59d068a46d8c6f 100644 --- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHook.java +++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHook.java @@ -1,5 +1,6 @@ package com.linkedin.metadata.kafka.hook.event; +import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableSet; import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; @@ -29,6 +30,7 @@ import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; +import lombok.Getter; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; @@ -78,10 +80,11 @@ public class EntityChangeEventGeneratorHook implements MetadataChangeLogHook { private static final Set SUPPORTED_OPERATIONS = ImmutableSet.of("CREATE", "UPSERT", "DELETE"); - private final EntityChangeEventGeneratorRegistry _entityChangeEventGeneratorRegistry; + private final EntityChangeEventGeneratorRegistry entityChangeEventGeneratorRegistry; private final OperationContext systemOperationContext; - private final SystemEntityClient _entityClient; - private final Boolean _isEnabled; + private final SystemEntityClient entityClient; + private final Boolean isEnabled; + @Getter private final String consumerGroupSuffix; @Autowired public EntityChangeEventGeneratorHook( @@ -89,17 +92,28 @@ public EntityChangeEventGeneratorHook( @Nonnull @Qualifier("entityChangeEventGeneratorRegistry") final EntityChangeEventGeneratorRegistry entityChangeEventGeneratorRegistry, @Nonnull final SystemEntityClient entityClient, - @Nonnull @Value("${entityChangeEvents.enabled:true}") Boolean isEnabled) { + @Nonnull @Value("${entityChangeEvents.enabled:true}") Boolean isEnabled, + @Nonnull @Value("${entityChangeEvents.consumerGroupSuffix}") String consumerGroupSuffix) { this.systemOperationContext = systemOperationContext; - _entityChangeEventGeneratorRegistry = + this.entityChangeEventGeneratorRegistry = Objects.requireNonNull(entityChangeEventGeneratorRegistry); - _entityClient = Objects.requireNonNull(entityClient); - _isEnabled = isEnabled; + this.entityClient = Objects.requireNonNull(entityClient); + this.isEnabled = isEnabled; + this.consumerGroupSuffix = consumerGroupSuffix; + } + + @VisibleForTesting + public EntityChangeEventGeneratorHook( + @Nonnull OperationContext systemOperationContext, + @Nonnull final EntityChangeEventGeneratorRegistry entityChangeEventGeneratorRegistry, + @Nonnull final SystemEntityClient entityClient, + @Nonnull Boolean isEnabled) { + this(systemOperationContext, entityChangeEventGeneratorRegistry, entityClient, isEnabled, ""); } @Override public boolean isEnabled() { - return _isEnabled; + return isEnabled; } @Override @@ -166,7 +180,7 @@ private List generateChangeEvents( @Nonnull final Aspect to, @Nonnull AuditStamp auditStamp) { final List> entityChangeEventGenerators = - _entityChangeEventGeneratorRegistry.getEntityChangeEventGenerators(aspectName).stream() + entityChangeEventGeneratorRegistry.getEntityChangeEventGenerators(aspectName).stream() // Note: Assumes that correct types have been registered for the aspect. .map(changeEventGenerator -> (EntityChangeEventGenerator) changeEventGenerator) .collect(Collectors.toList()); @@ -186,7 +200,7 @@ private boolean isEligibleForProcessing(final MetadataChangeLog log) { private void emitPlatformEvent( @Nonnull final PlatformEvent event, @Nonnull final String partitioningKey) throws Exception { - _entityClient.producePlatformEvent( + entityClient.producePlatformEvent( systemOperationContext, Constants.CHANGE_EVENT_PLATFORM_EVENT_NAME, partitioningKey, event); } diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/form/FormAssignmentHook.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/form/FormAssignmentHook.java index 8d093fe0b8a12d..063fa6de92c838 100644 --- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/form/FormAssignmentHook.java +++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/form/FormAssignmentHook.java @@ -2,6 +2,7 @@ import static com.linkedin.metadata.Constants.*; +import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableSet; import com.linkedin.events.metadata.ChangeType; import com.linkedin.form.DynamicFormAssignment; @@ -15,6 +16,7 @@ import java.util.Objects; import java.util.Set; import javax.annotation.Nonnull; +import lombok.Getter; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; @@ -53,17 +55,25 @@ public class FormAssignmentHook implements MetadataChangeLogHook { ImmutableSet.of( ChangeType.UPSERT, ChangeType.CREATE, ChangeType.CREATE_ENTITY, ChangeType.RESTATE); - private final FormService _formService; - private final boolean _isEnabled; + private final FormService formService; + private final boolean isEnabled; private OperationContext systemOperationContext; + @Getter private final String consumerGroupSuffix; @Autowired public FormAssignmentHook( @Nonnull final FormService formService, - @Nonnull @Value("${forms.hook.enabled:true}") Boolean isEnabled) { - _formService = Objects.requireNonNull(formService, "formService is required"); - _isEnabled = isEnabled; + @Nonnull @Value("${forms.hook.enabled:true}") Boolean isEnabled, + @Nonnull @Value("${forms.hook.consumerGroupSuffix}") String consumerGroupSuffix) { + this.formService = Objects.requireNonNull(formService, "formService is required"); + this.isEnabled = isEnabled; + this.consumerGroupSuffix = consumerGroupSuffix; + } + + @VisibleForTesting + public FormAssignmentHook(@Nonnull final FormService formService, @Nonnull Boolean isEnabled) { + this(formService, isEnabled, ""); } @Override @@ -74,12 +84,12 @@ public FormAssignmentHook init(@Nonnull OperationContext systemOperationContext) @Override public boolean isEnabled() { - return _isEnabled; + return isEnabled; } @Override public void invoke(@Nonnull final MetadataChangeLog event) { - if (_isEnabled && isEligibleForProcessing(event)) { + if (isEnabled && isEligibleForProcessing(event)) { if (isFormDynamicFilterUpdated(event)) { handleFormFilterUpdated(event); } @@ -96,7 +106,7 @@ private void handleFormFilterUpdated(@Nonnull final MetadataChangeLog event) { DynamicFormAssignment.class); // 2. Register a automation to assign it. - _formService.upsertFormAssignmentRunner( + formService.upsertFormAssignmentRunner( systemOperationContext, event.getEntityUrn(), formFilters); } diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/incident/IncidentsSummaryHook.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/incident/IncidentsSummaryHook.java index 7c03a11a81f7ac..5483fed9116e17 100644 --- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/incident/IncidentsSummaryHook.java +++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/incident/IncidentsSummaryHook.java @@ -2,6 +2,7 @@ import static com.linkedin.metadata.Constants.*; +import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableSet; import com.linkedin.common.IncidentSummaryDetails; import com.linkedin.common.IncidentSummaryDetailsArray; @@ -27,6 +28,7 @@ import java.util.Objects; import java.util.Set; import javax.annotation.Nonnull; +import lombok.Getter; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; @@ -57,20 +59,31 @@ public class IncidentsSummaryHook implements MetadataChangeLogHook { ImmutableSet.of(INCIDENT_INFO_ASPECT_NAME, STATUS_ASPECT_NAME); private OperationContext systemOperationContext; - private final IncidentService _incidentService; - private final boolean _isEnabled; + private final IncidentService incidentService; + private final boolean isEnabled; + @Getter private final String consumerGroupSuffix; /** Max number of incidents to allow in incident summary, limited to prevent HTTP errors */ - private final int _maxIncidentHistory; + private final int maxIncidentHistory; @Autowired public IncidentsSummaryHook( @Nonnull final IncidentService incidentService, - @Nonnull @Value("${incidents.hook.enabled:true}") Boolean isEnabled, - @Nonnull @Value("${incidents.hook.maxIncidentHistory:100}") Integer maxIncidentHistory) { - _incidentService = Objects.requireNonNull(incidentService, "incidentService is required"); - _isEnabled = isEnabled; - _maxIncidentHistory = maxIncidentHistory; + @Nonnull @Value("${incidents.hook.enabled}") Boolean isEnabled, + @Nonnull @Value("${incidents.hook.maxIncidentHistory}") Integer maxIncidentHistory, + @Nonnull @Value("${incidents.hook.consumerGroupSuffix}") String consumerGroupSuffix) { + this.incidentService = Objects.requireNonNull(incidentService, "incidentService is required"); + this.isEnabled = isEnabled; + this.maxIncidentHistory = maxIncidentHistory; + this.consumerGroupSuffix = consumerGroupSuffix; + } + + @VisibleForTesting + public IncidentsSummaryHook( + @Nonnull final IncidentService incidentService, + @Nonnull Boolean isEnabled, + @Nonnull Integer maxIncidentHistory) { + this(incidentService, isEnabled, maxIncidentHistory, ""); } @Override @@ -81,12 +94,12 @@ public IncidentsSummaryHook init(@Nonnull OperationContext systemOperationContex @Override public boolean isEnabled() { - return _isEnabled; + return isEnabled; } @Override public void invoke(@Nonnull final MetadataChangeLog event) { - if (_isEnabled && isEligibleForProcessing(event)) { + if (isEnabled && isEligibleForProcessing(event)) { log.debug("Urn {} received by Incident Summary Hook.", event.getEntityUrn()); final Urn urn = HookUtils.getUrnFromEvent(event, systemOperationContext.getEntityRegistry()); // Handle the deletion case. @@ -104,7 +117,7 @@ public void invoke(@Nonnull final MetadataChangeLog event) { private void handleIncidentSoftDeleted(@Nonnull final Urn incidentUrn) { // 1. Fetch incident info. IncidentInfo incidentInfo = - _incidentService.getIncidentInfo(systemOperationContext, incidentUrn); + incidentService.getIncidentInfo(systemOperationContext, incidentUrn); // 2. Retrieve associated urns. if (incidentInfo != null) { @@ -127,7 +140,7 @@ private void handleIncidentSoftDeleted(@Nonnull final Urn incidentUrn) { private void handleIncidentUpdated(@Nonnull final Urn incidentUrn) { // 1. Fetch incident info + status IncidentInfo incidentInfo = - _incidentService.getIncidentInfo(systemOperationContext, incidentUrn); + incidentService.getIncidentInfo(systemOperationContext, incidentUrn); // 2. Retrieve associated urns. if (incidentInfo != null) { @@ -179,14 +192,14 @@ private void addIncidentToSummary( IncidentsSummaryUtils.removeIncidentFromResolvedSummary(incidentUrn, summary); // Then, add to active. - IncidentsSummaryUtils.addIncidentToActiveSummary(details, summary, _maxIncidentHistory); + IncidentsSummaryUtils.addIncidentToActiveSummary(details, summary, maxIncidentHistory); } else if (IncidentState.RESOLVED.equals(status.getState())) { // First, ensure this isn't in any summaries anymore. IncidentsSummaryUtils.removeIncidentFromActiveSummary(incidentUrn, summary); // Then, add to resolved. - IncidentsSummaryUtils.addIncidentToResolvedSummary(details, summary, _maxIncidentHistory); + IncidentsSummaryUtils.addIncidentToResolvedSummary(details, summary, maxIncidentHistory); } // 3. Emit the change back! @@ -196,7 +209,7 @@ private void addIncidentToSummary( @Nonnull private IncidentsSummary getIncidentsSummary(@Nonnull final Urn entityUrn) { IncidentsSummary maybeIncidentsSummary = - _incidentService.getIncidentsSummary(systemOperationContext, entityUrn); + incidentService.getIncidentsSummary(systemOperationContext, entityUrn); return maybeIncidentsSummary == null ? new IncidentsSummary() .setResolvedIncidentDetails(new IncidentSummaryDetailsArray()) @@ -260,7 +273,7 @@ private boolean isIncidentUpdate(@Nonnull final MetadataChangeLog event) { private void updateIncidentSummary( @Nonnull final Urn entityUrn, @Nonnull final IncidentsSummary newSummary) { try { - _incidentService.updateIncidentsSummary(systemOperationContext, entityUrn, newSummary); + incidentService.updateIncidentsSummary(systemOperationContext, entityUrn, newSummary); } catch (Exception e) { log.error( String.format( diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/ingestion/IngestionSchedulerHook.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/ingestion/IngestionSchedulerHook.java index c13f0f75708f74..5569fade7e6eb1 100644 --- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/ingestion/IngestionSchedulerHook.java +++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/ingestion/IngestionSchedulerHook.java @@ -15,6 +15,7 @@ import com.linkedin.mxe.MetadataChangeLog; import io.datahubproject.metadata.context.OperationContext; import javax.annotation.Nonnull; +import lombok.Getter; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; @@ -29,27 +30,36 @@ @Component @Import({EntityRegistryFactory.class, IngestionSchedulerFactory.class}) public class IngestionSchedulerHook implements MetadataChangeLogHook { - private final IngestionScheduler _scheduler; - private final boolean _isEnabled; + private final IngestionScheduler scheduler; + private final boolean isEnabled; private OperationContext systemOperationContext; + @Getter private final String consumerGroupSuffix; @Autowired public IngestionSchedulerHook( @Nonnull final IngestionScheduler scheduler, - @Nonnull @Value("${ingestionScheduler.enabled:true}") Boolean isEnabled) { - _scheduler = scheduler; - _isEnabled = isEnabled; + @Nonnull @Value("${ingestionScheduler.enabled:true}") Boolean isEnabled, + @Nonnull @Value("${ingestionScheduler.consumerGroupSuffix}") String consumerGroupSuffix) { + this.scheduler = scheduler; + this.isEnabled = isEnabled; + this.consumerGroupSuffix = consumerGroupSuffix; + } + + @VisibleForTesting + public IngestionSchedulerHook( + @Nonnull final IngestionScheduler scheduler, @Nonnull Boolean isEnabled) { + this(scheduler, isEnabled, ""); } @Override public boolean isEnabled() { - return _isEnabled; + return isEnabled; } @Override public IngestionSchedulerHook init(@Nonnull OperationContext systemOperationContext) { this.systemOperationContext = systemOperationContext; - _scheduler.init(); + scheduler.init(); return this; } @@ -66,11 +76,11 @@ public void invoke(@Nonnull MetadataChangeLog event) { final Urn urn = getUrnFromEvent(event); if (ChangeType.DELETE.equals(event.getChangeType())) { - _scheduler.unscheduleNextIngestionSourceExecution(urn); + scheduler.unscheduleNextIngestionSourceExecution(urn); } else { // Update the scheduler to reflect the latest changes. final DataHubIngestionSourceInfo info = getInfoFromEvent(event); - _scheduler.scheduleNextIngestionSourceExecution(urn, info); + scheduler.scheduleNextIngestionSourceExecution(urn, info); } } } @@ -138,6 +148,6 @@ private DataHubIngestionSourceInfo getInfoFromEvent(final MetadataChangeLog even @VisibleForTesting IngestionScheduler scheduler() { - return _scheduler; + return scheduler; } } diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java index f068679da7757e..bbe0feed7de115 100644 --- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java +++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java @@ -41,6 +41,7 @@ import java.util.List; import java.util.stream.Collectors; import javax.annotation.Nonnull; +import lombok.Getter; import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Autowired; @@ -70,17 +71,28 @@ public class SiblingAssociationHook implements MetadataChangeLogHook { private final SystemEntityClient systemEntityClient; private final EntitySearchService entitySearchService; - private final boolean _isEnabled; + private final boolean isEnabled; private OperationContext systemOperationContext; + @Getter private final String consumerGroupSuffix; @Autowired public SiblingAssociationHook( @Nonnull final SystemEntityClient systemEntityClient, @Nonnull final EntitySearchService searchService, - @Nonnull @Value("${siblings.enabled:true}") Boolean isEnabled) { + @Nonnull @Value("${siblings.enabled:true}") Boolean isEnabled, + @Nonnull @Value("${siblings.consumerGroupSuffix}") String consumerGroupSuffix) { this.systemEntityClient = systemEntityClient; entitySearchService = searchService; - _isEnabled = isEnabled; + this.isEnabled = isEnabled; + this.consumerGroupSuffix = consumerGroupSuffix; + } + + @VisibleForTesting + public SiblingAssociationHook( + @Nonnull final SystemEntityClient systemEntityClient, + @Nonnull final EntitySearchService searchService, + @Nonnull Boolean isEnabled) { + this(systemEntityClient, searchService, isEnabled, ""); } @Value("${siblings.enabled:false}") @@ -99,7 +111,7 @@ public SiblingAssociationHook init(@Nonnull OperationContext systemOperationCont @Override public boolean isEnabled() { - return _isEnabled; + return isEnabled; } @Override diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHookTest.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHookTest.java index 411fe02260bb1b..4cd59992eb2f00 100644 --- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHookTest.java +++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHookTest.java @@ -125,7 +125,8 @@ public void setupTest() { mockTimeseriesAspectService, mockSystemMetadataService, searchDocumentTransformer, - mockEntityIndexBuilders); + mockEntityIndexBuilders, + "MD5"); OperationContext systemOperationContext = TestOperationContexts.systemContextNoSearchAuthorization(); @@ -235,7 +236,8 @@ public void testInputFieldsEdgesAreAdded() throws Exception { mockTimeseriesAspectService, mockSystemMetadataService, searchDocumentTransformer, - mockEntityIndexBuilders); + mockEntityIndexBuilders, + "MD5"); updateIndicesHook = new UpdateIndicesHook(updateIndicesService, true, false); updateIndicesHook.init( diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLGMSSpringTest.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLGMSSpringTest.java index c2a8de161eafe1..10f149e6062957 100644 --- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLGMSSpringTest.java +++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLGMSSpringTest.java @@ -3,7 +3,7 @@ import static org.testng.AssertJUnit.*; import com.linkedin.gms.factory.config.ConfigurationProvider; -import com.linkedin.metadata.kafka.MetadataChangeLogProcessor; +import com.linkedin.metadata.kafka.MCLKafkaListenerRegistrar; import com.linkedin.metadata.kafka.hook.UpdateIndicesHook; import com.linkedin.metadata.kafka.hook.event.EntityChangeEventGeneratorHook; import com.linkedin.metadata.kafka.hook.incident.IncidentsSummaryHook; @@ -35,23 +35,23 @@ public class MCLGMSSpringTest extends AbstractTestNGSpringContextTests { @Test public void testHooks() { - MetadataChangeLogProcessor metadataChangeLogProcessor = - applicationContext.getBean(MetadataChangeLogProcessor.class); + MCLKafkaListenerRegistrar registrar = + applicationContext.getBean(MCLKafkaListenerRegistrar.class); assertTrue( - metadataChangeLogProcessor.getHooks().stream() + registrar.getMetadataChangeLogHooks().stream() .noneMatch(hook -> hook instanceof IngestionSchedulerHook)); assertTrue( - metadataChangeLogProcessor.getHooks().stream() + registrar.getMetadataChangeLogHooks().stream() .anyMatch(hook -> hook instanceof UpdateIndicesHook)); assertTrue( - metadataChangeLogProcessor.getHooks().stream() + registrar.getMetadataChangeLogHooks().stream() .anyMatch(hook -> hook instanceof SiblingAssociationHook)); assertTrue( - metadataChangeLogProcessor.getHooks().stream() + registrar.getMetadataChangeLogHooks().stream() .anyMatch(hook -> hook instanceof EntityChangeEventGeneratorHook)); assertEquals( 1, - metadataChangeLogProcessor.getHooks().stream() + registrar.getMetadataChangeLogHooks().stream() .filter(hook -> hook instanceof IncidentsSummaryHook) .count()); } diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLMAESpringTest.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLMAESpringTest.java index 23de7707cc571d..2049e974999b18 100644 --- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLMAESpringTest.java +++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLMAESpringTest.java @@ -4,7 +4,7 @@ import static org.testng.AssertJUnit.assertTrue; import com.linkedin.gms.factory.config.ConfigurationProvider; -import com.linkedin.metadata.kafka.MetadataChangeLogProcessor; +import com.linkedin.metadata.kafka.MCLKafkaListenerRegistrar; import com.linkedin.metadata.kafka.hook.UpdateIndicesHook; import com.linkedin.metadata.kafka.hook.event.EntityChangeEventGeneratorHook; import com.linkedin.metadata.kafka.hook.incident.IncidentsSummaryHook; @@ -33,23 +33,23 @@ public class MCLMAESpringTest extends AbstractTestNGSpringContextTests { @Test public void testHooks() { - MetadataChangeLogProcessor metadataChangeLogProcessor = - applicationContext.getBean(MetadataChangeLogProcessor.class); + MCLKafkaListenerRegistrar registrar = + applicationContext.getBean(MCLKafkaListenerRegistrar.class); assertTrue( - metadataChangeLogProcessor.getHooks().stream() + registrar.getMetadataChangeLogHooks().stream() .noneMatch(hook -> hook instanceof IngestionSchedulerHook)); assertTrue( - metadataChangeLogProcessor.getHooks().stream() + registrar.getMetadataChangeLogHooks().stream() .anyMatch(hook -> hook instanceof UpdateIndicesHook)); assertTrue( - metadataChangeLogProcessor.getHooks().stream() + registrar.getMetadataChangeLogHooks().stream() .anyMatch(hook -> hook instanceof SiblingAssociationHook)); assertTrue( - metadataChangeLogProcessor.getHooks().stream() + registrar.getMetadataChangeLogHooks().stream() .anyMatch(hook -> hook instanceof EntityChangeEventGeneratorHook)); assertEquals( 1, - metadataChangeLogProcessor.getHooks().stream() + registrar.getMetadataChangeLogHooks().stream() .filter(hook -> hook instanceof IncidentsSummaryHook) .count()); } diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringCommonTestConfiguration.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringCommonTestConfiguration.java index f6f71a12a6951f..68768051eccad0 100644 --- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringCommonTestConfiguration.java +++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringCommonTestConfiguration.java @@ -34,10 +34,13 @@ @ComponentScan( basePackages = { "com.linkedin.metadata.kafka", - "com.linkedin.gms.factory.kafka.common", - "com.linkedin.gms.factory.kafka.schemaregistry", + "com.linkedin.gms.factory.kafka", "com.linkedin.gms.factory.entity.update.indices", - "com.linkedin.gms.factory.timeline.eventgenerator" + "com.linkedin.gms.factory.timeline.eventgenerator", + "com.linkedin.metadata.dao.producer", + "com.linkedin.gms.factory.change", + "com.datahub.event.hook", + "com.linkedin.gms.factory.notifications" }) public class MCLSpringCommonTestConfiguration { diff --git a/metadata-jobs/pe-consumer/src/main/java/com/datahub/event/PlatformEventProcessor.java b/metadata-jobs/pe-consumer/src/main/java/com/datahub/event/PlatformEventProcessor.java index c4116b314254ca..358a2ac0c2ee33 100644 --- a/metadata-jobs/pe-consumer/src/main/java/com/datahub/event/PlatformEventProcessor.java +++ b/metadata-jobs/pe-consumer/src/main/java/com/datahub/event/PlatformEventProcessor.java @@ -3,9 +3,7 @@ import com.codahale.metrics.Histogram; import com.codahale.metrics.MetricRegistry; import com.codahale.metrics.Timer; -import com.datahub.event.hook.BusinessAttributeUpdateHook; import com.datahub.event.hook.PlatformEventHook; -import com.linkedin.gms.factory.kafka.KafkaEventConsumerFactory; import com.linkedin.metadata.EventUtils; import com.linkedin.metadata.utils.metrics.MetricUtils; import com.linkedin.mxe.PlatformEvent; @@ -21,7 +19,6 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.context.annotation.Conditional; -import org.springframework.context.annotation.Import; import org.springframework.kafka.annotation.EnableKafka; import org.springframework.kafka.annotation.KafkaListener; import org.springframework.stereotype.Component; @@ -29,7 +26,6 @@ @Slf4j @Component @Conditional(PlatformEventProcessorCondition.class) -@Import({BusinessAttributeUpdateHook.class, KafkaEventConsumerFactory.class}) @EnableKafka public class PlatformEventProcessor { @@ -49,6 +45,11 @@ public PlatformEventProcessor( platformEventHooks.stream() .filter(PlatformEventHook::isEnabled) .collect(Collectors.toList()); + log.info( + "Enabled platform hooks: {}", + this.hooks.stream() + .map(hook -> hook.getClass().getSimpleName()) + .collect(Collectors.toList())); this.hooks.forEach(PlatformEventHook::init); } diff --git a/metadata-jobs/pe-consumer/src/main/java/com/datahub/event/hook/PlatformEventHook.java b/metadata-jobs/pe-consumer/src/main/java/com/datahub/event/hook/PlatformEventHook.java index 37241861f2e5e6..7fcc2a07b950bf 100644 --- a/metadata-jobs/pe-consumer/src/main/java/com/datahub/event/hook/PlatformEventHook.java +++ b/metadata-jobs/pe-consumer/src/main/java/com/datahub/event/hook/PlatformEventHook.java @@ -20,9 +20,7 @@ default void init() {} * Return whether the hook is enabled or not. If not enabled, the below invoke method is not * triggered */ - default boolean isEnabled() { - return true; - } + boolean isEnabled(); /** Invoke the hook when a PlatformEvent is received */ void invoke(@Nonnull OperationContext opContext, @Nonnull PlatformEvent event); diff --git a/metadata-models/src/main/pegasus/com/linkedin/dataset/EditableDatasetProperties.pdl b/metadata-models/src/main/pegasus/com/linkedin/dataset/EditableDatasetProperties.pdl index 939231d2d4738d..48e0bfb26227fb 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/dataset/EditableDatasetProperties.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/dataset/EditableDatasetProperties.pdl @@ -19,4 +19,13 @@ record EditableDatasetProperties includes ChangeAuditStamps { "fieldName": "editedDescription", } description: optional string + + /** + * Editable display name of the Dataset + */ + @Searchable = { + "fieldType": "TEXT_PARTIAL", + "fieldName": "editedName", + } + name: optional string } diff --git a/metadata-models/src/main/pegasus/com/linkedin/settings/global/GlobalSettingsInfo.pdl b/metadata-models/src/main/pegasus/com/linkedin/settings/global/GlobalSettingsInfo.pdl index 8d4121b767dc38..6c6f4d0036ce03 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/settings/global/GlobalSettingsInfo.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/settings/global/GlobalSettingsInfo.pdl @@ -12,16 +12,17 @@ record GlobalSettingsInfo { * SSO integrations between DataHub and identity providers */ sso: optional SsoSettings + /** * Settings related to the Views Feature */ views: optional GlobalViewsSettings + /** * Settings related to the documentation propagation feature */ - docPropagation: DocPropagationFeatureSettings = { + docPropagation: optional DocPropagationFeatureSettings = { "enabled": true "columnPropagationEnabled": true } - } \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/test/TestResult.pdl b/metadata-models/src/main/pegasus/com/linkedin/test/TestResult.pdl index 11c4aa944fa4f4..5f90f038d44756 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/test/TestResult.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/test/TestResult.pdl @@ -1,6 +1,7 @@ namespace com.linkedin.test import com.linkedin.common.Urn +import com.linkedin.common.AuditStamp /** * Information about a Test Result @@ -24,4 +25,15 @@ record TestResult { */ FAILURE } -} \ No newline at end of file + + /** + * The md5 of the test definition that was used to compute this result. + * See TestInfo.testDefinition.md5 for more information. + */ + testDefinitionMd5: optional string + + /** + * The audit stamp of when the result was computed, including the actor who computed it. + */ + lastComputed: optional AuditStamp +} diff --git a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/SearchContext.java b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/SearchContext.java index c067e91c3524cf..5ad7bdc14820c3 100644 --- a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/SearchContext.java +++ b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/SearchContext.java @@ -21,7 +21,7 @@ public class SearchContext implements ContextInterface { public static SearchContext EMPTY = - SearchContext.builder().indexConvention(IndexConventionImpl.NO_PREFIX).build(); + SearchContext.builder().indexConvention(IndexConventionImpl.noPrefix("")).build(); public static SearchContext withFlagDefaults( @Nonnull SearchContext searchContext, diff --git a/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java b/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java index e54c040fe13b58..76f58fb4751085 100644 --- a/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java +++ b/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java @@ -191,7 +191,7 @@ public static OperationContext systemContext( IndexConvention indexConvention = Optional.ofNullable(indexConventionSupplier) .map(Supplier::get) - .orElse(IndexConventionImpl.NO_PREFIX); + .orElse(IndexConventionImpl.noPrefix("MD5")); ServicesRegistryContext servicesRegistryContext = Optional.ofNullable(servicesRegistrySupplier).orElse(() -> null).get(); diff --git a/metadata-operation-context/src/test/java/io/datahubproject/metadata/context/SearchContextTest.java b/metadata-operation-context/src/test/java/io/datahubproject/metadata/context/SearchContextTest.java index 4858bb342258a5..2e0585cc82a4fd 100644 --- a/metadata-operation-context/src/test/java/io/datahubproject/metadata/context/SearchContextTest.java +++ b/metadata-operation-context/src/test/java/io/datahubproject/metadata/context/SearchContextTest.java @@ -12,26 +12,26 @@ public class SearchContextTest { @Test public void searchContextId() { SearchContext testNoFlags = - SearchContext.builder().indexConvention(IndexConventionImpl.NO_PREFIX).build(); + SearchContext.builder().indexConvention(IndexConventionImpl.noPrefix("MD5")).build(); assertEquals( testNoFlags.getCacheKeyComponent(), SearchContext.builder() - .indexConvention(IndexConventionImpl.NO_PREFIX) + .indexConvention(IndexConventionImpl.noPrefix("MD5")) .build() .getCacheKeyComponent(), "Expected consistent context ids across instances"); SearchContext testWithFlags = SearchContext.builder() - .indexConvention(IndexConventionImpl.NO_PREFIX) + .indexConvention(IndexConventionImpl.noPrefix("MD5")) .searchFlags(new SearchFlags().setFulltext(true)) .build(); assertEquals( testWithFlags.getCacheKeyComponent(), SearchContext.builder() - .indexConvention(IndexConventionImpl.NO_PREFIX) + .indexConvention(IndexConventionImpl.noPrefix("MD5")) .searchFlags(new SearchFlags().setFulltext(true)) .build() .getCacheKeyComponent(), @@ -44,7 +44,7 @@ public void searchContextId() { assertNotEquals( testWithFlags.getCacheKeyComponent(), SearchContext.builder() - .indexConvention(IndexConventionImpl.NO_PREFIX) + .indexConvention(IndexConventionImpl.noPrefix("MD5")) .searchFlags(new SearchFlags().setFulltext(true).setIncludeRestricted(true)) .build() .getCacheKeyComponent(), @@ -53,7 +53,7 @@ public void searchContextId() { assertNotEquals( testNoFlags.getCacheKeyComponent(), SearchContext.builder() - .indexConvention(new IndexConventionImpl("Some Prefix")) + .indexConvention(new IndexConventionImpl("Some Prefix", "MD5")) .searchFlags(null) .build() .getCacheKeyComponent(), @@ -61,7 +61,7 @@ public void searchContextId() { assertNotEquals( SearchContext.builder() - .indexConvention(IndexConventionImpl.NO_PREFIX) + .indexConvention(IndexConventionImpl.noPrefix("MD5")) .searchFlags( new SearchFlags() .setFulltext(false) @@ -70,7 +70,7 @@ public void searchContextId() { .build() .getCacheKeyComponent(), SearchContext.builder() - .indexConvention(IndexConventionImpl.NO_PREFIX) + .indexConvention(IndexConventionImpl.noPrefix("MD5")) .searchFlags(new SearchFlags().setFulltext(true).setIncludeRestricted(true)) .build() .getCacheKeyComponent(), @@ -80,7 +80,7 @@ public void searchContextId() { @Test public void testImmutableSearchFlags() { SearchContext initial = - SearchContext.builder().indexConvention(IndexConventionImpl.NO_PREFIX).build(); + SearchContext.builder().indexConvention(IndexConventionImpl.noPrefix("MD5")).build(); assertEquals(initial.getSearchFlags(), new SearchFlags().setSkipCache(false)); SearchContext mutated = initial.withFlagDefaults(flags -> flags.setSkipCache(true)); diff --git a/metadata-service/auth-servlet-impl/src/main/java/com/datahub/auth/authentication/AuthServiceController.java b/metadata-service/auth-servlet-impl/src/main/java/com/datahub/auth/authentication/AuthServiceController.java index 71eaca71a3641a..de2582af00a932 100644 --- a/metadata-service/auth-servlet-impl/src/main/java/com/datahub/auth/authentication/AuthServiceController.java +++ b/metadata-service/auth-servlet-impl/src/main/java/com/datahub/auth/authentication/AuthServiceController.java @@ -123,7 +123,7 @@ CompletableFuture> generateSessionTokenForUser( try { bodyJson = mapper.readTree(jsonStr); } catch (JsonProcessingException e) { - log.error("Failed to parse json while attempting to generate session token {}", jsonStr, e); + log.error("Failed to parse json while attempting to generate session token ", e); return CompletableFuture.completedFuture(new ResponseEntity<>(HttpStatus.BAD_REQUEST)); } if (bodyJson == null) { @@ -238,7 +238,7 @@ CompletableFuture> signUp(final HttpEntity httpEn try { Urn inviteTokenUrn = _inviteTokenService.getInviteTokenUrn(inviteTokenString); if (!_inviteTokenService.isInviteTokenValid(systemOperationContext, inviteTokenUrn)) { - log.error("Invalid invite token {}", inviteTokenString); + log.error("Invalid invite token"); return new ResponseEntity<>(HttpStatus.BAD_REQUEST); } @@ -386,7 +386,7 @@ CompletableFuture> track(final HttpEntity httpEnt try { bodyJson = mapper.readTree(jsonStr); } catch (JsonProcessingException e) { - log.error("Failed to parse json while attempting to track analytics event {}", jsonStr); + log.error("Failed to parse json while attempting to track analytics event", e); return CompletableFuture.completedFuture(new ResponseEntity<>(HttpStatus.BAD_REQUEST)); } if (bodyJson == null) { diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/ElasticSearchConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/ElasticSearchConfiguration.java index 130620a9ab918c..7d68e18940401e 100644 --- a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/ElasticSearchConfiguration.java +++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/ElasticSearchConfiguration.java @@ -8,4 +8,5 @@ public class ElasticSearchConfiguration { private BuildIndicesConfiguration buildIndices; public String implementation; private SearchConfiguration search; + private String idHashAlgo; } diff --git a/metadata-service/configuration/src/main/resources/application.yaml b/metadata-service/configuration/src/main/resources/application.yaml index 921c2b43dc36c6..5b3673ddca52c6 100644 --- a/metadata-service/configuration/src/main/resources/application.yaml +++ b/metadata-service/configuration/src/main/resources/application.yaml @@ -174,6 +174,7 @@ elasticsearch: opensearchUseAwsIamAuth: ${OPENSEARCH_USE_AWS_IAM_AUTH:false} region: ${AWS_REGION:#{null}} implementation: ${ELASTICSEARCH_IMPLEMENTATION:elasticsearch} # elasticsearch or opensearch, for handling divergent cases + idHashAlgo: ${ELASTIC_ID_HASH_ALGO:MD5} sslContext: # Required if useSSL is true protocol: ${ELASTICSEARCH_SSL_PROTOCOL:#{null}} secureRandomImplementation: ${ELASTICSEARCH_SSL_SECURE_RANDOM_IMPL:#{null}} @@ -295,10 +296,18 @@ metadataTests: siblings: enabled: ${ENABLE_SIBLING_HOOK:true} # enable to turn on automatic sibling associations for dbt + consumerGroupSuffix: ${SIBLINGS_HOOK_CONSUMER_GROUP_SUFFIX:} updateIndices: enabled: ${ENABLE_UPDATE_INDICES_HOOK:true} + consumerGroupSuffix: ${UPDATE_INDICES_CONSUMER_GROUP_SUFFIX:} ingestionScheduler: enabled: ${ENABLE_INGESTION_SCHEDULER_HOOK:true} # enable to execute ingestion scheduling + consumerGroupSuffix: ${INGESTION_SCHEDULER_HOOK_CONSUMER_GROUP_SUFFIX:} +incidents: + hook: + enabled: ${ENABLE_INCIDENTS_HOOK:true} + maxIncidentHistory: ${MAX_INCIDENT_HISTORY:100} + consumerGroupSuffix: ${INCIDENTS_HOOK_CONSUMER_GROUP_SUFFIX:} bootstrap: upgradeDefaultBrowsePaths: @@ -375,6 +384,7 @@ featureFlags: entityChangeEvents: enabled: ${ENABLE_ENTITY_CHANGE_EVENTS_HOOK:true} + consumerGroupSuffix: ${ECE_CONSUMER_GROUP_SUFFIX:} views: enabled: ${VIEWS_ENABLED:true} @@ -459,6 +469,7 @@ springdoc.api-docs.groups.enabled: true forms: hook: enabled: { $FORMS_HOOK_ENABLED:true } + consumerGroupSuffix: ${FORMS_HOOK_CONSUMER_GROUP_SUFFIX:} businessAttribute: fetchRelatedEntitiesCount: ${BUSINESS_ATTRIBUTE_RELATED_ENTITIES_COUNT:20000} diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchGraphServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchGraphServiceFactory.java index eb56e8d42c158e..55eb931625fecc 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchGraphServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchGraphServiceFactory.java @@ -11,6 +11,7 @@ import javax.annotation.Nonnull; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.Import; @@ -30,7 +31,8 @@ public class ElasticSearchGraphServiceFactory { @Bean(name = "elasticSearchGraphService") @Nonnull - protected ElasticSearchGraphService getInstance() { + protected ElasticSearchGraphService getInstance( + @Value("${elasticsearch.idHashAlgo}") final String idHashAlgo) { LineageRegistry lineageRegistry = new LineageRegistry(entityRegistry); return new ElasticSearchGraphService( lineageRegistry, @@ -45,6 +47,7 @@ protected ElasticSearchGraphService getInstance() { lineageRegistry, components.getIndexConvention(), configurationProvider.getElasticSearch().getSearch().getGraph()), - components.getIndexBuilder()); + components.getIndexBuilder(), + idHashAlgo); } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchSystemMetadataServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchSystemMetadataServiceFactory.java index d560fba399f340..fb48d64ce7ba9e 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchSystemMetadataServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchSystemMetadataServiceFactory.java @@ -6,6 +6,7 @@ import javax.annotation.Nonnull; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.Import; @@ -19,7 +20,8 @@ public class ElasticSearchSystemMetadataServiceFactory { @Bean(name = "elasticSearchSystemMetadataService") @Nonnull - protected ElasticSearchSystemMetadataService getInstance() { + protected ElasticSearchSystemMetadataService getInstance( + @Value("${elasticsearch.idHashAlgo}") final String elasticIdHashAlgo) { return new ElasticSearchSystemMetadataService( components.getBulkProcessor(), components.getIndexConvention(), @@ -28,6 +30,7 @@ protected ElasticSearchSystemMetadataService getInstance() { components.getIndexConvention(), components.getBulkProcessor(), components.getNumRetries()), - components.getIndexBuilder()); + components.getIndexBuilder(), + elasticIdHashAlgo); } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/IndexConventionFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/IndexConventionFactory.java index 5b76a3f2cb833f..2288c8d4ecd50d 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/IndexConventionFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/IndexConventionFactory.java @@ -19,7 +19,8 @@ public class IndexConventionFactory { private String indexPrefix; @Bean(name = INDEX_CONVENTION_BEAN) - protected IndexConvention createInstance() { - return new IndexConventionImpl(indexPrefix); + protected IndexConvention createInstance( + @Value("${elasticsearch.idHashAlgo}") final String isHashAlgo) { + return new IndexConventionImpl(indexPrefix, isHashAlgo); } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/update/indices/UpdateIndicesServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/update/indices/UpdateIndicesServiceFactory.java index fad9d0eaf3b45c..38a344f8be8e92 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/update/indices/UpdateIndicesServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/update/indices/UpdateIndicesServiceFactory.java @@ -9,6 +9,7 @@ import com.linkedin.metadata.service.UpdateIndicesService; import com.linkedin.metadata.systemmetadata.SystemMetadataService; import com.linkedin.metadata.timeseries.TimeseriesAspectService; +import org.springframework.beans.factory.annotation.Value; import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; @@ -30,7 +31,8 @@ public UpdateIndicesService searchIndicesServiceNonGMS( TimeseriesAspectService timeseriesAspectService, SystemMetadataService systemMetadataService, SearchDocumentTransformer searchDocumentTransformer, - EntityIndexBuilders entityIndexBuilders) { + EntityIndexBuilders entityIndexBuilders, + @Value("${elasticsearch.idHashAlgo}") final String idHashAlgo) { return new UpdateIndicesService( graphService, @@ -38,7 +40,8 @@ public UpdateIndicesService searchIndicesServiceNonGMS( timeseriesAspectService, systemMetadataService, searchDocumentTransformer, - entityIndexBuilders); + entityIndexBuilders, + idHashAlgo); } @Bean @@ -50,7 +53,8 @@ public UpdateIndicesService searchIndicesServiceGMS( final SystemMetadataService systemMetadataService, final SearchDocumentTransformer searchDocumentTransformer, final EntityIndexBuilders entityIndexBuilders, - final EntityService entityService) { + final EntityService entityService, + @Value("${elasticsearch.idHashAlgo}") final String idHashAlgo) { UpdateIndicesService updateIndicesService = new UpdateIndicesService( @@ -59,7 +63,8 @@ public UpdateIndicesService searchIndicesServiceGMS( timeseriesAspectService, systemMetadataService, searchDocumentTransformer, - entityIndexBuilders); + entityIndexBuilders, + idHashAlgo); entityService.setUpdateIndicesService(updateIndicesService); diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/EntityRegistryFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/EntityRegistryFactory.java index 2c65eeafe063bc..6ef13716aaac8c 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/EntityRegistryFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/EntityRegistryFactory.java @@ -1,15 +1,22 @@ package com.linkedin.gms.factory.entityregistry; +import com.datahub.plugins.metadata.aspect.SpringPluginFactory; +import com.linkedin.gms.factory.plugins.SpringStandardPluginConfiguration; +import com.linkedin.metadata.aspect.plugins.PluginFactory; +import com.linkedin.metadata.aspect.plugins.config.PluginConfiguration; import com.linkedin.metadata.models.registry.ConfigEntityRegistry; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.models.registry.EntityRegistryException; import com.linkedin.metadata.models.registry.MergedEntityRegistry; import com.linkedin.metadata.models.registry.PluginEntityRegistryLoader; import com.linkedin.metadata.models.registry.SnapshotEntityRegistry; +import java.util.List; +import java.util.function.BiFunction; import javax.annotation.Nonnull; import lombok.SneakyThrows; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.context.ApplicationContext; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.Import; @@ -27,13 +34,20 @@ public class EntityRegistryFactory { @Qualifier("pluginEntityRegistry") private PluginEntityRegistryLoader pluginEntityRegistryLoader; + @Autowired private ApplicationContext applicationContext; + @SneakyThrows @Bean("entityRegistry") @Primary @Nonnull - protected EntityRegistry getInstance() throws EntityRegistryException { + protected EntityRegistry getInstance( + SpringStandardPluginConfiguration springStandardPluginConfiguration) + throws EntityRegistryException { + BiFunction, PluginFactory> pluginFactoryProvider = + (config, loaders) -> new SpringPluginFactory(applicationContext, config, loaders); MergedEntityRegistry baseEntityRegistry = - new MergedEntityRegistry(SnapshotEntityRegistry.getInstance()).apply(configEntityRegistry); + new MergedEntityRegistry(new SnapshotEntityRegistry(pluginFactoryProvider)) + .apply(configEntityRegistry); pluginEntityRegistryLoader.withBaseRegistry(baseEntityRegistry).start(true); return baseEntityRegistry; } diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/KafkaEventConsumerFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/KafkaEventConsumerFactory.java index 9501b03482d045..aecb4f0afb12cc 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/KafkaEventConsumerFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/KafkaEventConsumerFactory.java @@ -96,7 +96,7 @@ private static Map buildCustomizedProperties( } @Bean(name = "kafkaEventConsumer") - protected KafkaListenerContainerFactory createInstance( + protected KafkaListenerContainerFactory kafkaEventConsumer( @Qualifier("kafkaConsumerFactory") DefaultKafkaConsumerFactory kafkaConsumerFactory, @Qualifier("configurationProvider") ConfigurationProvider configurationProvider) { diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/BackfillBrowsePathsV2Step.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/BackfillBrowsePathsV2Step.java index 6743effd775b1b..668892bb46b7fe 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/BackfillBrowsePathsV2Step.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/BackfillBrowsePathsV2Step.java @@ -1,6 +1,7 @@ package com.linkedin.metadata.boot.steps; import static com.linkedin.metadata.Constants.*; +import static com.linkedin.metadata.utils.SystemMetadataUtils.createDefaultSystemMetadata; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; @@ -23,7 +24,6 @@ import com.linkedin.metadata.search.SearchService; import com.linkedin.metadata.utils.GenericRecordUtils; import com.linkedin.mxe.MetadataChangeProposal; -import com.linkedin.mxe.SystemMetadata; import io.datahubproject.metadata.context.OperationContext; import java.util.Set; import javax.annotation.Nonnull; @@ -152,8 +152,7 @@ private void ingestBrowsePathsV2( proposal.setEntityType(urn.getEntityType()); proposal.setAspectName(Constants.BROWSE_PATHS_V2_ASPECT_NAME); proposal.setChangeType(ChangeType.UPSERT); - proposal.setSystemMetadata( - new SystemMetadata().setRunId(DEFAULT_RUN_ID).setLastObserved(System.currentTimeMillis())); + proposal.setSystemMetadata(createDefaultSystemMetadata()); proposal.setAspect(GenericRecordUtils.serializeAspect(browsePathsV2)); entityService.ingestProposal(systemOperationContext, proposal, auditStamp, false); } diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/UpgradeDefaultBrowsePathsStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/UpgradeDefaultBrowsePathsStep.java index 765ba2f44a54d1..89846476a9875e 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/UpgradeDefaultBrowsePathsStep.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/UpgradeDefaultBrowsePathsStep.java @@ -1,6 +1,7 @@ package com.linkedin.metadata.boot.steps; import static com.linkedin.metadata.Constants.*; +import static com.linkedin.metadata.utils.SystemMetadataUtils.createDefaultSystemMetadata; import com.google.common.collect.ImmutableSet; import com.linkedin.common.AuditStamp; @@ -17,7 +18,6 @@ import com.linkedin.metadata.search.utils.BrowsePathUtils; import com.linkedin.metadata.utils.GenericRecordUtils; import com.linkedin.mxe.MetadataChangeProposal; -import com.linkedin.mxe.SystemMetadata; import io.datahubproject.metadata.context.OperationContext; import java.util.Set; import javax.annotation.Nonnull; @@ -138,8 +138,7 @@ private void migrateBrowsePath( proposal.setEntityType(urn.getEntityType()); proposal.setAspectName(Constants.BROWSE_PATHS_ASPECT_NAME); proposal.setChangeType(ChangeType.UPSERT); - proposal.setSystemMetadata( - new SystemMetadata().setRunId(DEFAULT_RUN_ID).setLastObserved(System.currentTimeMillis())); + proposal.setSystemMetadata(createDefaultSystemMetadata()); proposal.setAspect(GenericRecordUtils.serializeAspect(newPaths)); entityService.ingestProposal(opContext, proposal, auditStamp, false); } diff --git a/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/v2/delegates/EntityApiDelegateImpl.java b/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/v2/delegates/EntityApiDelegateImpl.java index 9e671bfb7e01e5..c5f87c3f1dced4 100644 --- a/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/v2/delegates/EntityApiDelegateImpl.java +++ b/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/v2/delegates/EntityApiDelegateImpl.java @@ -61,6 +61,7 @@ import io.datahubproject.openapi.v1.entities.EntitiesController; import jakarta.servlet.http.HttpServletRequest; import java.net.URISyntaxException; +import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Objects; @@ -628,12 +629,18 @@ public ResponseEntity scroll( authentication, true); - // TODO multi-field sort - SortCriterion sortCriterion = new SortCriterion(); - sortCriterion.setField(Optional.ofNullable(sort).map(s -> s.get(0)).orElse("urn")); - sortCriterion.setOrder( - com.linkedin.metadata.query.filter.SortOrder.valueOf( - Optional.ofNullable(sortOrder).map(Enum::name).orElse("ASCENDING"))); + List sortCriteria = + Optional.ofNullable(sort).orElse(Collections.singletonList("urn")).stream() + .map( + sortField -> { + SortCriterion sortCriterion = new SortCriterion(); + sortCriterion.setField(sortField); + sortCriterion.setOrder( + com.linkedin.metadata.query.filter.SortOrder.valueOf( + Optional.ofNullable(sortOrder).map(Enum::name).orElse("ASCENDING"))); + return sortCriterion; + }) + .collect(Collectors.toList()); ScrollResult result = _searchService.scrollAcrossEntities( @@ -641,7 +648,7 @@ public ResponseEntity scroll( List.of(entitySpec.getName()), query, null, - sortCriterion, + sortCriteria, scrollId, null, count); diff --git a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v3/models/AspectItem.java b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v3/models/AspectItem.java new file mode 100644 index 00000000000000..ec5dff7817231c --- /dev/null +++ b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v3/models/AspectItem.java @@ -0,0 +1,15 @@ +package io.datahubproject.openapi.v3.models; + +import com.linkedin.common.AuditStamp; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.mxe.SystemMetadata; +import lombok.Builder; +import lombok.Value; + +@Builder(toBuilder = true) +@Value +public class AspectItem { + RecordTemplate aspect; + SystemMetadata systemMetadata; + AuditStamp auditStamp; +} diff --git a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v3/models/GenericAspectV3.java b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v3/models/GenericAspectV3.java index 4db2c3288d1547..70bf2182c29f47 100644 --- a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v3/models/GenericAspectV3.java +++ b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v3/models/GenericAspectV3.java @@ -19,4 +19,5 @@ public class GenericAspectV3 implements GenericAspect { @Nonnull Map value; @Nullable Map systemMetadata; @Nullable Map headers; + @Nullable Map auditStamp; } diff --git a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v3/models/GenericEntityV3.java b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v3/models/GenericEntityV3.java index 3af3b25028fadc..54d6ac2c1736f4 100644 --- a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v3/models/GenericEntityV3.java +++ b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v3/models/GenericEntityV3.java @@ -5,9 +5,6 @@ import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; import com.linkedin.common.urn.Urn; -import com.linkedin.data.template.RecordTemplate; -import com.linkedin.mxe.SystemMetadata; -import com.linkedin.util.Pair; import io.datahubproject.openapi.models.GenericEntity; import java.io.IOException; import java.nio.charset.StandardCharsets; @@ -42,9 +39,7 @@ public Map getAspects() { public static class GenericEntityV3Builder { public GenericEntityV3 build( - ObjectMapper objectMapper, - @Nonnull Urn urn, - Map> aspects) { + ObjectMapper objectMapper, @Nonnull Urn urn, Map aspects) { Map jsonObjectMap = aspects.entrySet().stream() .map( @@ -53,13 +48,18 @@ public GenericEntityV3 build( String aspectName = entry.getKey(); Map aspectValue = objectMapper.readValue( - RecordUtils.toJsonString(entry.getValue().getFirst()) + RecordUtils.toJsonString(entry.getValue().getAspect()) .getBytes(StandardCharsets.UTF_8), new TypeReference<>() {}); Map systemMetadata = - entry.getValue().getSecond() != null + entry.getValue().getSystemMetadata() != null ? objectMapper.convertValue( - entry.getValue().getSecond(), new TypeReference<>() {}) + entry.getValue().getSystemMetadata(), new TypeReference<>() {}) + : null; + Map auditStamp = + entry.getValue().getAuditStamp() != null + ? objectMapper.convertValue( + entry.getValue().getAuditStamp().data(), new TypeReference<>() {}) : null; return Map.entry( @@ -67,6 +67,7 @@ public GenericEntityV3 build( GenericAspectV3.builder() .value(aspectValue) .systemMetadata(systemMetadata) + .auditStamp(auditStamp) .build()); } catch (IOException ex) { throw new RuntimeException(ex); diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java index de5d2ae1118d4a..8d89417b292155 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java @@ -13,14 +13,11 @@ import com.datahub.authorization.AuthorizerChain; import com.datahub.util.RecordUtils; import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableSet; import com.linkedin.common.urn.Urn; -import com.linkedin.data.ByteString; import com.linkedin.data.template.RecordTemplate; import com.linkedin.entity.EnvelopedAspect; -import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.batch.AspectsBatch; import com.linkedin.metadata.aspect.batch.ChangeMCP; @@ -41,7 +38,6 @@ import com.linkedin.metadata.search.SearchEntityArray; import com.linkedin.metadata.search.SearchService; import com.linkedin.metadata.utils.AuditStampUtils; -import com.linkedin.metadata.utils.GenericRecordUtils; import com.linkedin.metadata.utils.SearchUtil; import com.linkedin.mxe.SystemMetadata; import com.linkedin.util.Pair; @@ -57,7 +53,6 @@ import jakarta.servlet.http.HttpServletRequest; import java.lang.reflect.InvocationTargetException; import java.net.URISyntaxException; -import java.nio.charset.StandardCharsets; import java.util.*; import java.util.stream.Collectors; import javax.annotation.Nonnull; @@ -66,6 +61,7 @@ import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.http.MediaType; import org.springframework.http.ResponseEntity; +import org.springframework.util.CollectionUtils; import org.springframework.web.bind.annotation.DeleteMapping; import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.PatchMapping; @@ -171,6 +167,7 @@ public ResponseEntity getEntities( @RequestParam(value = "query", defaultValue = "*") String query, @RequestParam(value = "scrollId", required = false) String scrollId, @RequestParam(value = "sort", required = false, defaultValue = "urn") String sortField, + @RequestParam(value = "sortCriteria", required = false) List sortFields, @RequestParam(value = "sortOrder", required = false, defaultValue = "ASCENDING") String sortOrder, @RequestParam(value = "systemMetadata", required = false, defaultValue = "false") @@ -199,8 +196,15 @@ public ResponseEntity getEntities( authentication, true); - // TODO: support additional and multiple sort params - SortCriterion sortCriterion = SearchUtil.sortBy(sortField, SortOrder.valueOf(sortOrder)); + List sortCriteria; + if (!CollectionUtils.isEmpty(sortFields)) { + sortCriteria = new ArrayList<>(); + sortFields.forEach( + field -> sortCriteria.add(SearchUtil.sortBy(field, SortOrder.valueOf(sortOrder)))); + } else { + sortCriteria = + Collections.singletonList(SearchUtil.sortBy(sortField, SortOrder.valueOf(sortOrder))); + } ScrollResult result = searchService.scrollAcrossEntities( @@ -211,7 +215,7 @@ public ResponseEntity getEntities( List.of(entitySpec.getName()), query, null, - sortCriterion, + sortCriteria, scrollId, null, count); @@ -726,28 +730,14 @@ protected RecordTemplate toRecordTemplate( aspectSpec.getDataTemplateClass(), envelopedAspect.getValue().data()); } - protected ChangeMCP toUpsertItem( + protected abstract ChangeMCP toUpsertItem( @Nonnull AspectRetriever aspectRetriever, Urn entityUrn, AspectSpec aspectSpec, Boolean createIfNotExists, String jsonAspect, Actor actor) - throws JsonProcessingException { - JsonNode jsonNode = objectMapper.readTree(jsonAspect); - String aspectJson = jsonNode.get("value").toString(); - return ChangeItemImpl.builder() - .urn(entityUrn) - .aspectName(aspectSpec.getName()) - .changeType(Boolean.TRUE.equals(createIfNotExists) ? ChangeType.CREATE : ChangeType.UPSERT) - .auditStamp(AuditStampUtils.createAuditStamp(actor.toUrnStr())) - .recordTemplate( - GenericRecordUtils.deserializeAspect( - ByteString.copyString(aspectJson, StandardCharsets.UTF_8), - GenericRecordUtils.JSON, - aspectSpec)) - .build(aspectRetriever); - } + throws URISyntaxException, JsonProcessingException; protected ChangeMCP toUpsertItem( @Nonnull AspectRetriever aspectRetriever, diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/elastic/OperationsController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/elastic/OperationsController.java index ddbc8004081eb6..ea72bac73edf38 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/elastic/OperationsController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/elastic/OperationsController.java @@ -20,6 +20,7 @@ import com.linkedin.metadata.entity.restoreindices.RestoreIndicesResult; import com.linkedin.metadata.query.SearchFlags; import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.query.filter.SortCriterion; import com.linkedin.metadata.search.EntitySearchService; import com.linkedin.metadata.systemmetadata.SystemMetadataService; import com.linkedin.metadata.timeseries.TimeseriesAspectService; @@ -214,6 +215,13 @@ public ResponseEntity explainSearchQuery( @RequestParam(value = "filters", required = false) @Nullable String filters, + @Parameter( + name = "sortCriteria", + required = false, + description = "Criteria to sort results on.") + @RequestParam("sortCriteria") + @Nullable + List sortCriteria, @Parameter(name = "searchFlags", description = "Optional configuration flags.") @RequestParam(value = "searchFlags", required = false) @Nullable @@ -253,7 +261,7 @@ public ResponseEntity explainSearchQuery( encodeValue(documentId), entityName, filters == null ? null : objectMapper.readValue(filters, Filter.class), - null, + sortCriteria, scrollId, keepAlive, size, @@ -315,6 +323,13 @@ public ResponseEntity explainSearchQueryDiff( @RequestParam(value = "filters", required = false) @Nullable String filters, + @Parameter( + name = "sortCriteria", + required = false, + description = "Criteria to sort results on.") + @RequestParam("sortCriteria") + @Nullable + List sortCriteria, @Parameter(name = "searchFlags", description = "Optional configuration flags.") @RequestParam(value = "searchFlags", required = false) @Nullable @@ -354,7 +369,7 @@ public ResponseEntity explainSearchQueryDiff( encodeValue(documentIdA), entityName, filters == null ? null : objectMapper.readValue(filters, Filter.class), - null, + sortCriteria, scrollId, keepAlive, size, @@ -367,7 +382,7 @@ public ResponseEntity explainSearchQueryDiff( encodeValue(documentIdB), entityName, filters == null ? null : objectMapper.readValue(filters, Filter.class), - null, + sortCriteria, scrollId, keepAlive, size, diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java index 54a7724cadd345..1207eb331b795e 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java @@ -13,8 +13,11 @@ import com.linkedin.data.ByteString; import com.linkedin.data.template.RecordTemplate; import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.batch.AspectsBatch; import com.linkedin.metadata.aspect.batch.BatchItem; +import com.linkedin.metadata.aspect.batch.ChangeMCP; import com.linkedin.metadata.entity.EntityApiUtils; import com.linkedin.metadata.entity.IngestResult; import com.linkedin.metadata.entity.UpdateAspectResult; @@ -260,4 +263,26 @@ protected List buildEntityList( } return responseList; } + + @Override + protected ChangeMCP toUpsertItem( + @Nonnull AspectRetriever aspectRetriever, + Urn entityUrn, + AspectSpec aspectSpec, + Boolean createIfNotExists, + String jsonAspect, + Actor actor) + throws URISyntaxException { + return ChangeItemImpl.builder() + .urn(entityUrn) + .aspectName(aspectSpec.getName()) + .changeType(Boolean.TRUE.equals(createIfNotExists) ? ChangeType.CREATE : ChangeType.UPSERT) + .auditStamp(AuditStampUtils.createAuditStamp(actor.toUrnStr())) + .recordTemplate( + GenericRecordUtils.deserializeAspect( + ByteString.copyString(jsonAspect, StandardCharsets.UTF_8), + GenericRecordUtils.JSON, + aspectSpec)) + .build(aspectRetriever); + } } diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/TimeseriesController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/TimeseriesController.java index bb10719bacd3fa..4e8c0abcb0c227 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/TimeseriesController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/TimeseriesController.java @@ -90,7 +90,7 @@ public ResponseEntity> getAspects( throw new IllegalArgumentException("Only timeseries aspects are supported."); } - List sortCriterion = + List sortCriteria = List.of( SearchUtil.sortBy("timestampMillis", SortOrder.DESCENDING), SearchUtil.sortBy("messageId", SortOrder.DESCENDING)); @@ -101,7 +101,7 @@ public ResponseEntity> getAspects( entityName, aspectName, null, - sortCriterion, + sortCriteria, scrollId, count, startTimeMillis, diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java index f26ad6821c5833..f6f248be77c670 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java @@ -40,7 +40,7 @@ public class OpenAPIV3Generator { private static final String NAME_QUERY = "query"; private static final String NAME_PATH = "path"; private static final String NAME_SYSTEM_METADATA = "systemMetadata"; - private static final String NAME_ASYNC = "async"; + private static final String NAME_AUDIT_STAMP = "auditStamp"; private static final String NAME_VERSION = "version"; private static final String NAME_SCROLL_ID = "scrollId"; private static final String NAME_INCLUDE_SOFT_DELETE = "includeSoftDelete"; @@ -77,9 +77,6 @@ public static OpenAPI generateOpenApiSpec(EntityRegistry entityRegistry) { // Components final Components components = new Components(); // --> Aspect components - // TODO: Correct handling of SystemMetadata and SortOrder - components.addSchemas( - "SystemMetadata", new Schema().type(TYPE_OBJECT).additionalProperties(true)); components.addSchemas("SortOrder", new Schema()._enum(List.of("ASCENDING", "DESCENDING"))); components.addSchemas("AspectPatch", buildAspectPatchSchema()); components.addSchemas( @@ -167,6 +164,10 @@ public static OpenAPI generateOpenApiSpec(EntityRegistry entityRegistry) { buildSingleEntityAspectPath( e, a.getName(), a.getPegasusSchema().getName()))); }); + // TODO: Correct handling of SystemMetadata and AuditStamp + components.addSchemas( + "SystemMetadata", new Schema().type(TYPE_OBJECT).additionalProperties(true)); + components.addSchemas("AuditStamp", new Schema().type(TYPE_OBJECT).additionalProperties(true)); return new OpenAPI().openapi("3.0.1").info(info).paths(paths).components(components); } @@ -185,7 +186,7 @@ private static PathItem buildSingleEntityPath(final EntitySpec entity) { .schema(new Schema().type(TYPE_STRING)), new Parameter() .in(NAME_QUERY) - .name("systemMetadata") + .name(NAME_SYSTEM_METADATA) .description("Include systemMetadata with response.") .schema(new Schema().type(TYPE_BOOLEAN)._default(false)), new Parameter() @@ -424,7 +425,7 @@ private static PathItem buildBatchGetEntityPath(final EntitySpec entity) { List.of( new Parameter() .in(NAME_QUERY) - .name("systemMetadata") + .name(NAME_SYSTEM_METADATA) .description("Include systemMetadata with response.") .schema(new Schema().type(TYPE_BOOLEAN)._default(false)))) .requestBody( @@ -575,12 +576,19 @@ private static Schema buildAspectRefResponseSchema(final String aspectName) { .required(List.of(PROPERTY_VALUE)) .addProperty(PROPERTY_VALUE, new Schema<>().$ref(PATH_DEFINITIONS + aspectName)); result.addProperty( - "systemMetadata", + NAME_SYSTEM_METADATA, new Schema<>() .type(TYPE_OBJECT) .anyOf(List.of(new Schema().$ref(PATH_DEFINITIONS + "SystemMetadata"))) .description("System metadata for the aspect.") .nullable(true)); + result.addProperty( + NAME_AUDIT_STAMP, + new Schema<>() + .type(TYPE_OBJECT) + .anyOf(List.of(new Schema().$ref(PATH_DEFINITIONS + "AuditStamp"))) + .description("Audit stamp for the aspect.") + .nullable(true)); return result; } @@ -592,7 +600,7 @@ private static Schema buildAspectRefRequestSchema(final String aspectName) { .required(List.of(PROPERTY_VALUE)) .addProperty(PROPERTY_VALUE, new Schema<>().$ref(PATH_DEFINITIONS + aspectName)); result.addProperty( - "systemMetadata", + NAME_SYSTEM_METADATA, new Schema<>() .type(TYPE_OBJECT) .anyOf(List.of(new Schema().$ref(PATH_DEFINITIONS + "SystemMetadata"))) @@ -867,7 +875,7 @@ private static PathItem buildSingleEntityAspectPath( List.of( new Parameter() .in(NAME_QUERY) - .name("systemMetadata") + .name(NAME_SYSTEM_METADATA) .description("Include systemMetadata with response.") .schema(new Schema().type(TYPE_BOOLEAN)._default(false)))) .summary(String.format("Patch aspect %s on %s ", aspect, upperFirstEntity)) diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java index 9ca34934e4c657..fbc9bf2956cfd3 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java @@ -13,10 +13,12 @@ import com.fasterxml.jackson.databind.node.ObjectNode; import com.linkedin.common.urn.Urn; import com.linkedin.data.ByteString; -import com.linkedin.data.template.RecordTemplate; import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.batch.AspectsBatch; import com.linkedin.metadata.aspect.batch.BatchItem; +import com.linkedin.metadata.aspect.batch.ChangeMCP; import com.linkedin.metadata.entity.EntityApiUtils; import com.linkedin.metadata.entity.IngestResult; import com.linkedin.metadata.entity.UpdateAspectResult; @@ -28,12 +30,12 @@ import com.linkedin.metadata.utils.AuditStampUtils; import com.linkedin.metadata.utils.GenericRecordUtils; import com.linkedin.mxe.SystemMetadata; -import com.linkedin.util.Pair; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.metadata.context.RequestContext; import io.datahubproject.openapi.controller.GenericEntitiesController; import io.datahubproject.openapi.exception.InvalidUrnException; import io.datahubproject.openapi.exception.UnauthorizedException; +import io.datahubproject.openapi.v3.models.AspectItem; import io.datahubproject.openapi.v3.models.GenericAspectV3; import io.datahubproject.openapi.v3.models.GenericEntityScrollResultV3; import io.datahubproject.openapi.v3.models.GenericEntityV3; @@ -143,11 +145,27 @@ protected List buildEntityVersionedAspectList( .map( u -> GenericEntityV3.builder() - .build(objectMapper, u, toAspectMap(u, aspects.get(u), withSystemMetadata))) + .build( + objectMapper, u, toAspectItemMap(u, aspects.get(u), withSystemMetadata))) .collect(Collectors.toList()); } } + private Map toAspectItemMap( + Urn urn, List aspects, boolean withSystemMetadata) { + return aspects.stream() + .map( + a -> + Map.entry( + a.getName(), + AspectItem.builder() + .aspect(toRecordTemplate(lookupAspectSpec(urn, a.getName()), a)) + .systemMetadata(withSystemMetadata ? a.getSystemMetadata() : null) + .auditStamp(withSystemMetadata ? a.getCreated() : null) + .build())) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + } + @Override protected List buildEntityList( Set ingestResults, boolean withSystemMetadata) { @@ -156,15 +174,21 @@ protected List buildEntityList( Map> entityMap = ingestResults.stream().collect(Collectors.groupingBy(IngestResult::getUrn)); for (Map.Entry> urnAspects : entityMap.entrySet()) { - Map> aspectsMap = + Map aspectsMap = urnAspects.getValue().stream() .map( ingest -> Map.entry( ingest.getRequest().getAspectName(), - Pair.of( - ingest.getRequest().getRecordTemplate(), - withSystemMetadata ? ingest.getRequest().getSystemMetadata() : null))) + AspectItem.builder() + .aspect(ingest.getRequest().getRecordTemplate()) + .systemMetadata( + withSystemMetadata + ? ingest.getRequest().getSystemMetadata() + : null) + .auditStamp( + withSystemMetadata ? ingest.getRequest().getAuditStamp() : null) + .build())) .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); responseList.add( GenericEntityV3.builder().build(objectMapper, urnAspects.getKey(), aspectsMap)); @@ -183,9 +207,12 @@ protected GenericEntityV3 buildGenericEntity( updateAspectResult.getUrn(), Map.of( aspectName, - Pair.of( - updateAspectResult.getNewValue(), - withSystemMetadata ? updateAspectResult.getNewSystemMetadata() : null))); + AspectItem.builder() + .aspect(updateAspectResult.getNewValue()) + .systemMetadata( + withSystemMetadata ? updateAspectResult.getNewSystemMetadata() : null) + .auditStamp(withSystemMetadata ? updateAspectResult.getAuditStamp() : null) + .build())); } private List toRecordTemplates( @@ -324,4 +351,28 @@ protected AspectsBatch toMCPBatch( .retrieverContext(opContext.getRetrieverContext().get()) .build(); } + + @Override + protected ChangeMCP toUpsertItem( + @Nonnull AspectRetriever aspectRetriever, + Urn entityUrn, + AspectSpec aspectSpec, + Boolean createIfNotExists, + String jsonAspect, + Actor actor) + throws JsonProcessingException { + JsonNode jsonNode = objectMapper.readTree(jsonAspect); + String aspectJson = jsonNode.get("value").toString(); + return ChangeItemImpl.builder() + .urn(entityUrn) + .aspectName(aspectSpec.getName()) + .changeType(Boolean.TRUE.equals(createIfNotExists) ? ChangeType.CREATE : ChangeType.UPSERT) + .auditStamp(AuditStampUtils.createAuditStamp(actor.toUrnStr())) + .recordTemplate( + GenericRecordUtils.deserializeAspect( + ByteString.copyString(aspectJson, StandardCharsets.UTF_8), + GenericRecordUtils.JSON, + aspectSpec)) + .build(aspectRetriever); + } } diff --git a/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/controller/EntityControllerTest.java b/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/controller/EntityControllerTest.java index 3c7e93621f5cce..60425fc7e756ed 100644 --- a/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/controller/EntityControllerTest.java +++ b/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/controller/EntityControllerTest.java @@ -37,6 +37,7 @@ import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.openapi.config.SpringWebConfig; import io.datahubproject.test.metadata.context.TestOperationContexts; +import java.util.Collections; import java.util.List; import java.util.Map; import org.springframework.beans.factory.annotation.Autowired; @@ -95,7 +96,7 @@ public void testSearchOrderPreserved() throws Exception { eq(List.of("dataset")), anyString(), nullable(Filter.class), - eq(SearchUtil.sortBy("urn", SortOrder.valueOf("ASCENDING"))), + eq(Collections.singletonList(SearchUtil.sortBy("urn", SortOrder.valueOf("ASCENDING")))), nullable(String.class), nullable(String.class), anyInt())) @@ -113,7 +114,9 @@ public void testSearchOrderPreserved() throws Exception { eq(List.of("dataset")), anyString(), nullable(Filter.class), - eq(SearchUtil.sortBy("urn", SortOrder.valueOf("DESCENDING"))), + eq( + Collections.singletonList( + SearchUtil.sortBy("urn", SortOrder.valueOf("DESCENDING")))), nullable(String.class), nullable(String.class), anyInt())) diff --git a/metadata-service/restli-api/src/main/idl/com.linkedin.entity.entities.restspec.json b/metadata-service/restli-api/src/main/idl/com.linkedin.entity.entities.restspec.json index fe53b43ccd1da8..935a4293839281 100644 --- a/metadata-service/restli-api/src/main/idl/com.linkedin.entity.entities.restspec.json +++ b/metadata-service/restli-api/src/main/idl/com.linkedin.entity.entities.restspec.json @@ -199,6 +199,10 @@ "name" : "sort", "type" : "com.linkedin.metadata.query.filter.SortCriterion", "optional" : true + }, { + "name" : "sortCriteria", + "type" : "{ \"type\" : \"array\", \"items\" : \"com.linkedin.metadata.query.filter.SortCriterion\" }", + "optional" : true }, { "name" : "start", "type" : "int" @@ -248,6 +252,10 @@ "name" : "sort", "type" : "com.linkedin.metadata.query.filter.SortCriterion", "optional" : true + }, { + "name" : "sortCriteria", + "type" : "{ \"type\" : \"array\", \"items\" : \"com.linkedin.metadata.query.filter.SortCriterion\" }", + "optional" : true }, { "name" : "start", "type" : "int" @@ -288,6 +296,10 @@ "name" : "sort", "type" : "com.linkedin.metadata.query.filter.SortCriterion", "optional" : true + }, { + "name" : "sortCriteria", + "type" : "{ \"type\" : \"array\", \"items\" : \"com.linkedin.metadata.query.filter.SortCriterion\" }", + "optional" : true }, { "name" : "scrollId", "type" : "string", @@ -333,6 +345,10 @@ "name" : "sort", "type" : "com.linkedin.metadata.query.filter.SortCriterion", "optional" : true + }, { + "name" : "sortCriteria", + "type" : "{ \"type\" : \"array\", \"items\" : \"com.linkedin.metadata.query.filter.SortCriterion\" }", + "optional" : true }, { "name" : "scrollId", "type" : "string", @@ -374,6 +390,10 @@ "name" : "sort", "type" : "com.linkedin.metadata.query.filter.SortCriterion", "optional" : true + }, { + "name" : "sortCriteria", + "type" : "{ \"type\" : \"array\", \"items\" : \"com.linkedin.metadata.query.filter.SortCriterion\" }", + "optional" : true }, { "name" : "start", "type" : "int" @@ -411,6 +431,10 @@ "name" : "sort", "type" : "com.linkedin.metadata.query.filter.SortCriterion", "optional" : true + }, { + "name" : "sortCriteria", + "type" : "{ \"type\" : \"array\", \"items\" : \"com.linkedin.metadata.query.filter.SortCriterion\" }", + "optional" : true }, { "name" : "start", "type" : "int" @@ -452,6 +476,10 @@ "name" : "sort", "type" : "com.linkedin.metadata.query.filter.SortCriterion", "optional" : true + }, { + "name" : "sortCriteria", + "type" : "{ \"type\" : \"array\", \"items\" : \"com.linkedin.metadata.query.filter.SortCriterion\" }", + "optional" : true }, { "name" : "start", "type" : "int" diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json index 305b3de1563503..5b6f7a290fd1a8 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json @@ -2328,6 +2328,15 @@ "fieldName" : "editedDescription", "fieldType" : "TEXT" } + }, { + "name" : "name", + "type" : "string", + "doc" : "Editable display name of the Dataset", + "optional" : true, + "Searchable" : { + "fieldName" : "editedName", + "fieldType" : "TEXT_PARTIAL" + } } ], "Aspect" : { "name" : "editableDatasetProperties" @@ -6811,6 +6820,10 @@ "name" : "sort", "type" : "com.linkedin.metadata.query.filter.SortCriterion", "optional" : true + }, { + "name" : "sortCriteria", + "type" : "{ \"type\" : \"array\", \"items\" : \"com.linkedin.metadata.query.filter.SortCriterion\" }", + "optional" : true }, { "name" : "start", "type" : "int" @@ -6860,6 +6873,10 @@ "name" : "sort", "type" : "com.linkedin.metadata.query.filter.SortCriterion", "optional" : true + }, { + "name" : "sortCriteria", + "type" : "{ \"type\" : \"array\", \"items\" : \"com.linkedin.metadata.query.filter.SortCriterion\" }", + "optional" : true }, { "name" : "start", "type" : "int" @@ -6900,6 +6917,10 @@ "name" : "sort", "type" : "com.linkedin.metadata.query.filter.SortCriterion", "optional" : true + }, { + "name" : "sortCriteria", + "type" : "{ \"type\" : \"array\", \"items\" : \"com.linkedin.metadata.query.filter.SortCriterion\" }", + "optional" : true }, { "name" : "scrollId", "type" : "string", @@ -6945,6 +6966,10 @@ "name" : "sort", "type" : "com.linkedin.metadata.query.filter.SortCriterion", "optional" : true + }, { + "name" : "sortCriteria", + "type" : "{ \"type\" : \"array\", \"items\" : \"com.linkedin.metadata.query.filter.SortCriterion\" }", + "optional" : true }, { "name" : "scrollId", "type" : "string", @@ -6986,6 +7011,10 @@ "name" : "sort", "type" : "com.linkedin.metadata.query.filter.SortCriterion", "optional" : true + }, { + "name" : "sortCriteria", + "type" : "{ \"type\" : \"array\", \"items\" : \"com.linkedin.metadata.query.filter.SortCriterion\" }", + "optional" : true }, { "name" : "start", "type" : "int" @@ -7023,6 +7052,10 @@ "name" : "sort", "type" : "com.linkedin.metadata.query.filter.SortCriterion", "optional" : true + }, { + "name" : "sortCriteria", + "type" : "{ \"type\" : \"array\", \"items\" : \"com.linkedin.metadata.query.filter.SortCriterion\" }", + "optional" : true }, { "name" : "start", "type" : "int" @@ -7064,6 +7097,10 @@ "name" : "sort", "type" : "com.linkedin.metadata.query.filter.SortCriterion", "optional" : true + }, { + "name" : "sortCriteria", + "type" : "{ \"type\" : \"array\", \"items\" : \"com.linkedin.metadata.query.filter.SortCriterion\" }", + "optional" : true }, { "name" : "start", "type" : "int" diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json index cf43490cbdd6d7..3f64d1b9480354 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json @@ -2328,6 +2328,15 @@ "fieldName" : "editedDescription", "fieldType" : "TEXT" } + }, { + "name" : "name", + "type" : "string", + "doc" : "Editable display name of the Dataset", + "optional" : true, + "Searchable" : { + "fieldName" : "editedName", + "fieldType" : "TEXT_PARTIAL" + } } ], "Aspect" : { "name" : "editableDatasetProperties" diff --git a/metadata-service/restli-client-api/src/main/java/com/linkedin/entity/client/EntityClient.java b/metadata-service/restli-client-api/src/main/java/com/linkedin/entity/client/EntityClient.java index 8821143cde6cc3..5f086e79a387a8 100644 --- a/metadata-service/restli-client-api/src/main/java/com/linkedin/entity/client/EntityClient.java +++ b/metadata-service/restli-client-api/src/main/java/com/linkedin/entity/client/EntityClient.java @@ -38,7 +38,6 @@ import java.util.Map; import java.util.Optional; import java.util.Set; -import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -236,7 +235,7 @@ ListResult list( * * @param input search query * @param filter search filters - * @param sortCriterion sort criterion + * @param sortCriteria sort criteria * @param start start offset for search results * @param count max number of search results requested * @return Snapshot key @@ -247,7 +246,7 @@ SearchResult search( @Nonnull String entity, @Nonnull String input, @Nullable Filter filter, - SortCriterion sortCriterion, + List sortCriteria, int start, int count) throws RemoteInvocationException; @@ -271,7 +270,7 @@ SearchResult searchAcrossEntities( @Nullable Filter filter, int start, int count, - @Nullable SortCriterion sortCriterion) + List sortCriteria) throws RemoteInvocationException; /** @@ -293,7 +292,7 @@ SearchResult searchAcrossEntities( @Nullable Filter filter, int start, int count, - @Nullable SortCriterion sortCriterion, + List sortCriteria, List facets) throws RemoteInvocationException; @@ -329,7 +328,7 @@ ScrollResult scrollAcrossEntities( * @param input the search input text * @param maxHops the max number of hops away to search for. If null, searches all hops. * @param filter the request map with fields and values as filters to be applied to search hits - * @param sortCriterion {@link SortCriterion} to be applied to search results + * @param sortCriteria list of {@link SortCriterion} to be applied to search results * @param start index to start the search from * @param count the number of search hits to return * @return a {@link SearchResult} that contains a list of matched documents and related search @@ -343,7 +342,7 @@ LineageSearchResult searchAcrossLineage( @Nonnull String input, @Nullable Integer maxHops, @Nullable Filter filter, - @Nullable SortCriterion sortCriterion, + List sortCriteria, int start, int count) throws RemoteInvocationException; @@ -357,7 +356,7 @@ LineageSearchResult searchAcrossLineage( * @param input the search input text * @param maxHops the max number of hops away to search for. If null, searches all hops. * @param filter the request map with fields and values as filters to be applied to search hits - * @param sortCriterion {@link SortCriterion} to be applied to search results + * @param sortCriteria list of {@link SortCriterion} to be applied to search results * @param scrollId opaque scroll ID indicating offset * @param keepAlive string representation of time to keep point in time alive, ex: 5m * @param count the number of search hits to return of roundtrips for UI visualizations. @@ -373,7 +372,7 @@ LineageScrollResult scrollAcrossLineage( @Nonnull String input, @Nullable Integer maxHops, @Nullable Filter filter, - @Nullable SortCriterion sortCriterion, + List sortCriteria, @Nullable String scrollId, @Nonnull String keepAlive, int count) @@ -427,7 +426,7 @@ void deleteEntityReferences(@Nonnull OperationContext opContext, @Nonnull final * * @param entity filter entity * @param filter search filters - * @param sortCriterion sort criterion + * @param sortCriteria sort criteria * @param start start offset for search results * @param count max number of search results requested * @return a set of {@link SearchResult}s @@ -437,7 +436,7 @@ SearchResult filter( @Nonnull OperationContext opContext, @Nonnull String entity, @Nonnull Filter filter, - @Nullable SortCriterion sortCriterion, + List sortCriteria, int start, int count) throws RemoteInvocationException; @@ -519,27 +518,17 @@ default String ingestProposal( return ingestProposal(opContext, metadataChangeProposal, false); } - String ingestProposal( + /** + * Ingest a MetadataChangeProposal event. + * + * @return the urn string ingested + */ + default String ingestProposal( @Nonnull OperationContext opContext, @Nonnull final MetadataChangeProposal metadataChangeProposal, final boolean async) - throws RemoteInvocationException; - - @Deprecated - default String wrappedIngestProposal( - @Nonnull OperationContext opContext, @Nonnull MetadataChangeProposal metadataChangeProposal) { - return wrappedIngestProposal(opContext, metadataChangeProposal, false); - } - - default String wrappedIngestProposal( - @Nonnull OperationContext opContext, - @Nonnull MetadataChangeProposal metadataChangeProposal, - final boolean async) { - try { - return ingestProposal(opContext, metadataChangeProposal, async); - } catch (RemoteInvocationException e) { - throw new RuntimeException(e); - } + throws RemoteInvocationException { + return batchIngestProposals(opContext, List.of(metadataChangeProposal), async).get(0); } @Deprecated @@ -550,15 +539,20 @@ default List batchIngestProposals( return batchIngestProposals(opContext, metadataChangeProposals, false); } - default List batchIngestProposals( + /** + * Ingest a list of proposals in a batch. + * + * @param opContext operation context + * @param metadataChangeProposals list of proposals + * @param async async or sync ingestion path + * @return ingested urns + */ + @Nonnull + List batchIngestProposals( @Nonnull OperationContext opContext, @Nonnull final Collection metadataChangeProposals, final boolean async) - throws RemoteInvocationException { - return metadataChangeProposals.stream() - .map(proposal -> wrappedIngestProposal(opContext, proposal, async)) - .collect(Collectors.toList()); - } + throws RemoteInvocationException; @Deprecated Optional getVersionedAspect( diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java index fe1ca571efea52..bc5b9e439d293e 100644 --- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java +++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java @@ -12,7 +12,7 @@ import com.linkedin.data.template.RecordTemplate; import com.linkedin.data.template.StringArray; import com.linkedin.entity.AspectsDoGetTimeseriesAspectValuesRequestBuilder; -import com.linkedin.entity.AspectsDoIngestProposalRequestBuilder; +import com.linkedin.entity.AspectsDoIngestProposalBatchRequestBuilder; import com.linkedin.entity.AspectsGetRequestBuilder; import com.linkedin.entity.AspectsRequestBuilders; import com.linkedin.entity.EntitiesBatchGetRequestBuilder; @@ -50,6 +50,7 @@ import com.linkedin.metadata.browse.BrowseResult; import com.linkedin.metadata.browse.BrowseResultV2; import com.linkedin.metadata.graph.LineageDirection; +import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.query.AutoCompleteResult; import com.linkedin.metadata.query.LineageFlags; import com.linkedin.metadata.query.ListResult; @@ -62,11 +63,14 @@ import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; +import com.linkedin.metadata.query.filter.SortCriterionArray; import com.linkedin.metadata.search.LineageScrollResult; import com.linkedin.metadata.search.LineageSearchResult; import com.linkedin.metadata.search.ScrollResult; import com.linkedin.metadata.search.SearchResult; +import com.linkedin.metadata.utils.EntityKeyUtils; import com.linkedin.mxe.MetadataChangeProposal; +import com.linkedin.mxe.MetadataChangeProposalArray; import com.linkedin.mxe.PlatformEvent; import com.linkedin.mxe.SystemMetadata; import com.linkedin.parseq.retry.backoff.BackoffPolicy; @@ -98,6 +102,7 @@ import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.NotImplementedException; +import org.opensearch.core.common.util.CollectionUtils; @Slf4j public class RestliEntityClient extends BaseClient implements EntityClient { @@ -589,7 +594,7 @@ public ListResult list( * * @param input search query * @param filter search filters - * @param sortCriterion sort criterion + * @param sortCriteria sort criteria * @param start start offset for search results * @param count max number of search results requested * @return Snapshot key @@ -602,7 +607,7 @@ public SearchResult search( @Nonnull String entity, @Nonnull String input, @Nullable Filter filter, - SortCriterion sortCriterion, + List sortCriteria, int start, int count) throws RemoteInvocationException { @@ -620,8 +625,9 @@ public SearchResult search( requestBuilder.filterParam(filter); } - if (sortCriterion != null) { - requestBuilder.sortParam(sortCriterion); + if (!CollectionUtils.isEmpty(sortCriteria)) { + requestBuilder.sortParam(sortCriteria.get(0)); + requestBuilder.sortCriteriaParam(new SortCriterionArray(sortCriteria)); } if (searchFlags != null) { @@ -643,10 +649,10 @@ public SearchResult searchAcrossEntities( @Nullable Filter filter, int start, int count, - @Nullable SortCriterion sortCriterion) + List sortCriteria) throws RemoteInvocationException { return searchAcrossEntities( - opContext, entities, input, filter, start, count, sortCriterion, null); + opContext, entities, input, filter, start, count, sortCriteria, null); } /** @@ -670,7 +676,7 @@ public SearchResult searchAcrossEntities( @Nullable Filter filter, int start, int count, - @Nullable SortCriterion sortCriterion, + List sortCriteria, @Nullable List facets) throws RemoteInvocationException { @@ -692,8 +698,9 @@ public SearchResult searchAcrossEntities( requestBuilder.searchFlagsParam(searchFlags); } - if (sortCriterion != null) { - requestBuilder.sortParam(sortCriterion); + if (!CollectionUtils.isEmpty(sortCriteria)) { + requestBuilder.sortParam(sortCriteria.get(0)); + requestBuilder.sortCriteriaParam(new SortCriterionArray(sortCriteria)); } return sendClientRequest(requestBuilder, opContext.getAuthentication()).getEntity(); @@ -743,7 +750,7 @@ public LineageSearchResult searchAcrossLineage( @Nonnull String input, @Nullable Integer maxHops, @Nullable Filter filter, - @Nullable SortCriterion sortCriterion, + List sortCriteria, int start, int count) throws RemoteInvocationException { @@ -770,6 +777,12 @@ public LineageSearchResult searchAcrossLineage( if (lineageFlags.getEndTimeMillis() != null) { requestBuilder.endTimeMillisParam(lineageFlags.getEndTimeMillis()); } + + if (!CollectionUtils.isEmpty(sortCriteria)) { + requestBuilder.sortParam(sortCriteria.get(0)); + requestBuilder.sortCriteriaParam(new SortCriterionArray(sortCriteria)); + } + requestBuilder.searchFlagsParam(opContext.getSearchContext().getSearchFlags()); return sendClientRequest(requestBuilder, opContext.getAuthentication()).getEntity(); @@ -785,7 +798,7 @@ public LineageScrollResult scrollAcrossLineage( @Nonnull String input, @Nullable Integer maxHops, @Nullable Filter filter, - @Nullable SortCriterion sortCriterion, + List sortCriteria, @Nullable String scrollId, @Nonnull String keepAlive, int count) @@ -815,6 +828,12 @@ public LineageScrollResult scrollAcrossLineage( if (lineageFlags.getEndTimeMillis() != null) { requestBuilder.endTimeMillisParam(lineageFlags.getEndTimeMillis()); } + + if (!CollectionUtils.isEmpty(sortCriteria)) { + requestBuilder.sortParam(sortCriteria.get(0)); + requestBuilder.sortCriteriaParam(new SortCriterionArray(sortCriteria)); + } + requestBuilder.searchFlagsParam(opContext.getSearchContext().getSearchFlags()); return sendClientRequest(requestBuilder, opContext.getAuthentication()).getEntity(); @@ -903,7 +922,7 @@ public SearchResult filter( @Nonnull OperationContext opContext, @Nonnull String entity, @Nonnull Filter filter, - @Nullable SortCriterion sortCriterion, + List sortCriteria, int start, int count) throws RemoteInvocationException { @@ -914,8 +933,9 @@ public SearchResult filter( .filterParam(filter) .startParam(start) .countParam(count); - if (sortCriterion != null) { - requestBuilder.sortParam(sortCriterion); + if (!CollectionUtils.isEmpty(sortCriteria)) { + requestBuilder.sortParam(sortCriteria.get(0)); + requestBuilder.sortCriteriaParam(new SortCriterionArray(sortCriteria)); } return sendClientRequest(requestBuilder, opContext.getAuthentication()).getEntity(); } @@ -1047,23 +1067,36 @@ public List getTimeseriesAspectValues( .getValues(); } - /** - * Ingest a MetadataChangeProposal event. - * - * @return the urn string ingested - */ + @Nonnull @Override - public String ingestProposal( + public List batchIngestProposals( @Nonnull OperationContext opContext, - @Nonnull final MetadataChangeProposal metadataChangeProposal, - final boolean async) + @Nonnull Collection metadataChangeProposals, + boolean async) throws RemoteInvocationException { - final AspectsDoIngestProposalRequestBuilder requestBuilder = + final AspectsDoIngestProposalBatchRequestBuilder requestBuilder = ASPECTS_REQUEST_BUILDERS - .actionIngestProposal() - .proposalParam(metadataChangeProposal) + .actionIngestProposalBatch() + .proposalsParam(new MetadataChangeProposalArray(metadataChangeProposals)) .asyncParam(String.valueOf(async)); - return sendClientRequest(requestBuilder, opContext.getSessionAuthentication()).getEntity(); + String result = + sendClientRequest(requestBuilder, opContext.getSessionAuthentication()).getEntity(); + return metadataChangeProposals.stream() + .map( + proposal -> { + if ("success".equals(result)) { + if (proposal.getEntityUrn() != null) { + return proposal.getEntityUrn().toString(); + } else { + EntitySpec entitySpec = + opContext.getEntityRegistry().getEntitySpec(proposal.getEntityType()); + return EntityKeyUtils.getUrnFromProposal(proposal, entitySpec.getKeyAspectSpec()) + .toString(); + } + } + return null; + }) + .collect(Collectors.toList()); } @Override diff --git a/metadata-service/restli-servlet-impl/build.gradle b/metadata-service/restli-servlet-impl/build.gradle index c1484f00efe595..6b68abfe7fb15c 100644 --- a/metadata-service/restli-servlet-impl/build.gradle +++ b/metadata-service/restli-servlet-impl/build.gradle @@ -106,3 +106,6 @@ pegasus.main.idlOptions.addIdlItem([ ]) ext.apiProject = project(':metadata-service:restli-api') + +spotlessJava.dependsOn generateTestDataTemplate +spotlessJava.dependsOn generateIntegTestDataTemplate diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java index e79dda34256822..8a5473da95ba2a 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java @@ -13,6 +13,7 @@ import static com.linkedin.metadata.resources.restli.RestliConstants.*; import static com.linkedin.metadata.search.utils.SearchUtils.*; import static com.linkedin.metadata.utils.PegasusUtils.*; +import static com.linkedin.metadata.utils.SystemMetadataUtils.generateSystemMetadataIfEmpty; import com.codahale.metrics.MetricRegistry; import com.datahub.authentication.Authentication; @@ -20,6 +21,7 @@ import com.datahub.authorization.AuthUtil; import com.datahub.authorization.EntitySpec; +import com.linkedin.metadata.utils.SystemMetadataUtils; import io.datahubproject.metadata.context.RequestContext; import io.datahubproject.metadata.services.RestrictedService; import com.linkedin.data.template.SetMode; @@ -257,19 +259,6 @@ public Task> batchGet( MetricRegistry.name(this.getClass(), "batchGet")); } - private SystemMetadata populateDefaultFieldsIfEmpty(@Nullable SystemMetadata systemMetadata) { - SystemMetadata result = systemMetadata; - if (result == null) { - result = new SystemMetadata(); - } - - if (result.getLastObserved() == 0) { - result.setLastObserved(System.currentTimeMillis()); - } - - return result; - } - @Action(name = ACTION_INGEST) @Nonnull @WithSpan @@ -297,7 +286,7 @@ public Task ingest( throw new RestLiServiceException(HttpStatus.S_422_UNPROCESSABLE_ENTITY, e); } - SystemMetadata systemMetadata = populateDefaultFieldsIfEmpty(providedSystemMetadata); + SystemMetadata systemMetadata = generateSystemMetadataIfEmpty(providedSystemMetadata); final AuditStamp auditStamp = new AuditStamp().setTime(_clock.millis()).setActor(Urn.createFromString(actorUrnStr)); @@ -358,7 +347,7 @@ public Task batchIngest( final List finalSystemMetadataList = Arrays.stream(systemMetadataList) - .map(systemMetadata -> populateDefaultFieldsIfEmpty(systemMetadata)) + .map(SystemMetadataUtils::generateSystemMetadataIfEmpty) .collect(Collectors.toList()); return RestliUtil.toTask( @@ -378,6 +367,7 @@ public Task search( @ActionParam(PARAM_INPUT) @Nonnull String input, @ActionParam(PARAM_FILTER) @Optional @Nullable Filter filter, @ActionParam(PARAM_SORT) @Optional @Nullable SortCriterion sortCriterion, + @ActionParam(PARAM_SORT_CRITERIA) @Optional @Nullable SortCriterion[] sortCriteria, @ActionParam(PARAM_START) int start, @ActionParam(PARAM_COUNT) int count, @Optional @Deprecated @Nullable @ActionParam(PARAM_FULLTEXT) Boolean fulltext, @@ -397,6 +387,8 @@ public Task search( RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), ACTION_SEARCH, entityName), authorizer, auth, true) .withSearchFlags(flags -> searchFlags != null ? searchFlags : new SearchFlags().setFulltext(Boolean.TRUE.equals(fulltext))); + List sortCriterionList = getSortCriteria(sortCriteria, sortCriterion); + log.info("GET SEARCH RESULTS for {} with query {}", entityName, input); // TODO - change it to use _searchService once we are confident on it's latency return RestliUtil.toTask( @@ -405,7 +397,7 @@ public Task search( // This API is not used by the frontend for search bars so we default to structured result = entitySearchService.search(opContext, - List.of(entityName), input, filter, sortCriterion, start, count); + List.of(entityName), input, filter, sortCriterionList, start, count); if (!isAPIAuthorizedResult( auth, @@ -428,6 +420,7 @@ public Task searchAcrossEntities( @ActionParam(PARAM_INPUT) @Nonnull String input, @ActionParam(PARAM_FILTER) @Optional @Nullable Filter filter, @ActionParam(PARAM_SORT) @Optional @Nullable SortCriterion sortCriterion, + @ActionParam(PARAM_SORT_CRITERIA) @Optional @Nullable SortCriterion[] sortCriteria, @ActionParam(PARAM_START) int start, @ActionParam(PARAM_COUNT) int count, @ActionParam(PARAM_SEARCH_FLAGS) @Optional SearchFlags searchFlags) { @@ -447,10 +440,12 @@ public Task searchAcrossEntities( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to search."); } + List sortCriterionList = getSortCriteria(sortCriteria, sortCriterion); + log.info("GET SEARCH RESULTS ACROSS ENTITIES for {} with query {}", entityList, input); return RestliUtil.toTask( () -> { - SearchResult result = searchService.searchAcrossEntities(opContext, entityList, input, filter, sortCriterion, start, count); + SearchResult result = searchService.searchAcrossEntities(opContext, entityList, input, filter, sortCriterionList, start, count); if (!isAPIAuthorizedResult( auth, authorizer, @@ -463,6 +458,18 @@ public Task searchAcrossEntities( }); } + private List getSortCriteria(@Nullable SortCriterion[] sortCriteria, @Nullable SortCriterion sortCriterion) { + List sortCriterionList; + if (sortCriteria != null) { + sortCriterionList = Arrays.asList(sortCriteria); + } else if (sortCriterion != null) { + sortCriterionList = Collections.singletonList(sortCriterion); + } else { + sortCriterionList = Collections.emptyList(); + } + return sortCriterionList; + } + @Action(name = ACTION_SCROLL_ACROSS_ENTITIES) @Nonnull @WithSpan @@ -471,6 +478,7 @@ public Task scrollAcrossEntities( @ActionParam(PARAM_INPUT) @Nonnull String input, @ActionParam(PARAM_FILTER) @Optional @Nullable Filter filter, @ActionParam(PARAM_SORT) @Optional @Nullable SortCriterion sortCriterion, + @ActionParam(PARAM_SORT_CRITERIA) @Optional @Nullable SortCriterion[] sortCriteria, @ActionParam(PARAM_SCROLL_ID) @Optional @Nullable String scrollId, @ActionParam(PARAM_KEEP_ALIVE) String keepAlive, @ActionParam(PARAM_COUNT) int count, @@ -490,6 +498,8 @@ public Task scrollAcrossEntities( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to search."); } + List sortCriterionList = getSortCriteria(sortCriteria, sortCriterion); + log.info( "GET SCROLL RESULTS ACROSS ENTITIES for {} with query {} and scroll ID: {}", entityList, @@ -503,7 +513,7 @@ public Task scrollAcrossEntities( entityList, input, filter, - sortCriterion, + sortCriterionList, scrollId, keepAlive, count); @@ -531,6 +541,7 @@ public Task searchAcrossLineage( @ActionParam(PARAM_MAX_HOPS) @Optional @Nullable Integer maxHops, @ActionParam(PARAM_FILTER) @Optional @Nullable Filter filter, @ActionParam(PARAM_SORT) @Optional @Nullable SortCriterion sortCriterion, + @ActionParam(PARAM_SORT_CRITERIA) @Optional @Nullable SortCriterion[] sortCriteria, @ActionParam(PARAM_START) int start, @ActionParam(PARAM_COUNT) int count, @ActionParam(PARAM_START_TIME_MILLIS) @Optional @Nullable Long startTimeMillis, @@ -546,6 +557,8 @@ public Task searchAcrossLineage( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to search."); } + List sortCriterionList = getSortCriteria(sortCriteria, sortCriterion); + OperationContext opContext = OperationContext.asSession( systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), ACTION_SEARCH_ACROSS_LINEAGE, entities), authorizer, auth, true) .withSearchFlags(flags -> (searchFlags != null ? searchFlags : new SearchFlags().setFulltext(true)) @@ -570,7 +583,7 @@ public Task searchAcrossLineage( input, maxHops, filter, - sortCriterion, + sortCriterionList, start, count), entityService), @@ -588,6 +601,7 @@ public Task scrollAcrossLineage( @ActionParam(PARAM_MAX_HOPS) @Optional @Nullable Integer maxHops, @ActionParam(PARAM_FILTER) @Optional @Nullable Filter filter, @ActionParam(PARAM_SORT) @Optional @Nullable SortCriterion sortCriterion, + @ActionParam(PARAM_SORT_CRITERIA) @Optional @Nullable SortCriterion[] sortCriteria, @ActionParam(PARAM_SCROLL_ID) @Optional @Nullable String scrollId, @ActionParam(PARAM_KEEP_ALIVE) String keepAlive, @ActionParam(PARAM_COUNT) int count, @@ -622,6 +636,8 @@ public Task scrollAcrossLineage( entityList, input); + List sortCriterionList = getSortCriteria(sortCriteria, sortCriterion); + return RestliUtil.toTask( () -> validateLineageScrollResult(opContext, @@ -633,7 +649,7 @@ public Task scrollAcrossLineage( input, maxHops, filter, - sortCriterion, + sortCriterionList, scrollId, keepAlive, count), @@ -648,6 +664,7 @@ public Task list( @ActionParam(PARAM_ENTITY) @Nonnull String entityName, @ActionParam(PARAM_FILTER) @Optional @Nullable Filter filter, @ActionParam(PARAM_SORT) @Optional @Nullable SortCriterion sortCriterion, + @ActionParam(PARAM_SORT_CRITERIA) @Optional @Nullable SortCriterion[] sortCriteria, @ActionParam(PARAM_START) int start, @ActionParam(PARAM_COUNT) int count) { @@ -664,10 +681,12 @@ public Task list( systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), ACTION_LIST, entityName), authorizer, auth, true) .withSearchFlags(flags -> new SearchFlags().setFulltext(false)); + List sortCriterionList = getSortCriteria(sortCriteria, sortCriterion); + log.info("GET LIST RESULTS for {} with filter {}", entityName, filter); return RestliUtil.toTask( () -> { - SearchResult result = entitySearchService.filter(opContext, entityName, filter, sortCriterion, start, count); + SearchResult result = entitySearchService.filter(opContext, entityName, filter, sortCriterionList, start, count); if (!AuthUtil.isAPIAuthorizedResult( auth, authorizer, @@ -1159,6 +1178,7 @@ public Task filter( @ActionParam(PARAM_ENTITY) @Nonnull String entityName, @ActionParam(PARAM_FILTER) Filter filter, @ActionParam(PARAM_SORT) @Optional @Nullable SortCriterion sortCriterion, + @ActionParam(PARAM_SORT_CRITERIA) @Optional @Nullable SortCriterion[] sortCriteria, @ActionParam(PARAM_START) int start, @ActionParam(PARAM_COUNT) int count) { @@ -1172,10 +1192,12 @@ public Task filter( } OperationContext opContext = OperationContext.asSession( systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), ACTION_FILTER, entityName), authorizer, auth, true); + + List sortCriterionList = getSortCriteria(sortCriteria, sortCriterion); log.info("FILTER RESULTS for {} with filter {}", entityName, filter); return RestliUtil.toTask( () -> { - SearchResult result = entitySearchService.filter(opContext.withSearchFlags(flags -> flags.setFulltext(true)), entityName, filter, sortCriterion, start, count); + SearchResult result = entitySearchService.filter(opContext.withSearchFlags(flags -> flags.setFulltext(true)), entityName, filter, sortCriterionList, start, count); if (!isAPIAuthorizedResult( auth, authorizer, diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliConstants.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliConstants.java index af6efb1ad80939..ef79a404c2145e 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliConstants.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliConstants.java @@ -24,6 +24,7 @@ private RestliConstants() {} public static final String PARAM_FILTER = "filter"; public static final String PARAM_GROUP = "group"; public static final String PARAM_SORT = "sort"; + public static final String PARAM_SORT_CRITERIA = "sortCriteria"; public static final String PARAM_QUERY = "query"; public static final String PARAM_FIELD = "field"; public static final String PARAM_PATH = "path"; diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java index 518dfecd576808..1b003fec82e8b8 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java @@ -2,25 +2,20 @@ import static com.datahub.authorization.AuthUtil.isAPIAuthorized; import static com.datahub.authorization.AuthUtil.isAPIAuthorizedEntityUrns; -import static com.linkedin.metadata.Constants.*; import static com.linkedin.metadata.authorization.ApiOperation.UPDATE; import static com.linkedin.metadata.timeseries.elastic.UsageServiceUtil.USAGE_STATS_ASPECT_NAME; import static com.linkedin.metadata.timeseries.elastic.UsageServiceUtil.USAGE_STATS_ENTITY_NAME; import com.codahale.metrics.MetricRegistry; -import com.codahale.metrics.Timer; import com.datahub.authentication.Authentication; import com.datahub.authentication.AuthenticationContext; import com.datahub.authorization.EntitySpec; import com.datahub.plugins.auth.authorization.Authorizer; import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.core.StreamReadConstraints; import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; import com.linkedin.common.WindowDuration; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; -import com.linkedin.data.template.StringArray; import com.linkedin.dataset.DatasetFieldUsageCounts; import com.linkedin.dataset.DatasetFieldUsageCountsArray; import com.linkedin.dataset.DatasetUsageStatistics; @@ -29,17 +24,10 @@ import com.linkedin.metadata.authorization.PoliciesConfig; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.registry.EntityRegistry; -import com.linkedin.metadata.query.filter.Condition; -import com.linkedin.metadata.query.filter.ConjunctiveCriterion; -import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; -import com.linkedin.metadata.query.filter.CriterionArray; -import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.restli.RestliUtil; import com.linkedin.metadata.timeseries.TimeseriesAspectService; import com.linkedin.metadata.timeseries.elastic.UsageServiceUtil; import com.linkedin.metadata.timeseries.transformer.TimeseriesAspectTransformer; -import com.linkedin.metadata.utils.metrics.MetricUtils; import com.linkedin.parseq.Task; import com.linkedin.restli.common.HttpStatus; import com.linkedin.restli.server.RestLiServiceException; @@ -47,35 +35,20 @@ import com.linkedin.restli.server.annotations.ActionParam; import com.linkedin.restli.server.annotations.RestLiSimpleResource; import com.linkedin.restli.server.resources.SimpleResourceTemplate; -import com.linkedin.timeseries.AggregationSpec; -import com.linkedin.timeseries.AggregationType; -import com.linkedin.timeseries.CalendarInterval; -import com.linkedin.timeseries.GenericTable; -import com.linkedin.timeseries.GroupingBucket; -import com.linkedin.timeseries.GroupingBucketType; import com.linkedin.timeseries.TimeWindowSize; import com.linkedin.usage.FieldUsageCounts; -import com.linkedin.usage.FieldUsageCountsArray; import com.linkedin.usage.UsageAggregation; -import com.linkedin.usage.UsageAggregationArray; import com.linkedin.usage.UsageAggregationMetrics; import com.linkedin.usage.UsageQueryResult; -import com.linkedin.usage.UsageQueryResultAggregations; import com.linkedin.usage.UsageTimeRange; import com.linkedin.usage.UserUsageCounts; -import com.linkedin.usage.UserUsageCountsArray; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.metadata.context.RequestContext; import io.opentelemetry.extension.annotations.WithSpan; -import java.net.URISyntaxException; -import java.time.Instant; -import java.util.ArrayList; import java.util.Arrays; -import java.util.List; import java.util.Map; import java.util.Set; import java.util.stream.Collectors; -import java.util.concurrent.TimeUnit; import javax.annotation.Nonnull; import javax.inject.Inject; import javax.inject.Named; @@ -255,7 +228,8 @@ private void ingest(@Nonnull OperationContext opContext, @Nonnull UsageAggregati try { documents = TimeseriesAspectTransformer.transform( - bucket.getResource(), datasetUsageStatistics, getUsageStatsAspectSpec(), null); + bucket.getResource(), datasetUsageStatistics, getUsageStatsAspectSpec(), null, + systemOperationContext.getSearchContext().getIndexConvention().getIdHashAlgo()); } catch (JsonProcessingException e) { log.error("Failed to generate timeseries document from aspect: {}", e.toString()); return; diff --git a/metadata-service/restli-servlet-impl/src/test/java/mock/MockTimeseriesAspectService.java b/metadata-service/restli-servlet-impl/src/test/java/mock/MockTimeseriesAspectService.java index aaf90d279e0bda..7ed183e975f3b9 100644 --- a/metadata-service/restli-servlet-impl/src/test/java/mock/MockTimeseriesAspectService.java +++ b/metadata-service/restli-servlet-impl/src/test/java/mock/MockTimeseriesAspectService.java @@ -137,7 +137,7 @@ public TimeseriesScrollResult scrollAspects( @Nonnull String entityName, @Nonnull String aspectName, @Nullable Filter filter, - @Nonnull List sortCriterion, + @Nonnull List sortCriteria, @Nullable String scrollId, int count, @Nullable Long startTimeMillis, diff --git a/metadata-service/schema-registry-servlet/src/main/java/io/datahubproject/openapi/schema/registry/SchemaRegistryController.java b/metadata-service/schema-registry-servlet/src/main/java/io/datahubproject/openapi/schema/registry/SchemaRegistryController.java index d73b353f38ae78..09043c6dd5e87e 100644 --- a/metadata-service/schema-registry-servlet/src/main/java/io/datahubproject/openapi/schema/registry/SchemaRegistryController.java +++ b/metadata-service/schema-registry-servlet/src/main/java/io/datahubproject/openapi/schema/registry/SchemaRegistryController.java @@ -307,7 +307,11 @@ public ResponseEntity register( }) .orElseGet( () -> { - log.error("Couldn't find topic with name {}.", topicName); + if (topicName.matches("^[a-zA-Z0-9._-]+$")) { + log.error("Couldn't find topic with name {}.", topicName); + } else { + log.error("Couldn't find topic (Malformed topic name)"); + } return new ResponseEntity<>(HttpStatus.NOT_FOUND); }); } diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java index aed9b97411ff68..ed14dec4ed940a 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java @@ -729,11 +729,11 @@ private MetadataChangeProposal updateFormsAspect( .collect(Collectors.toList()); List completedForms = formsAspect.getCompletedForms().stream() - .filter(completedForm -> completedForm.getUrn() != deletedUrn) + .filter(completedForm -> !completedForm.getUrn().equals(deletedUrn)) .collect(Collectors.toList()); final List verifications = formsAspect.getVerifications().stream() - .filter(verification -> verification.getForm() != deletedUrn) + .filter(verification -> !verification.getForm().equals(deletedUrn)) .collect(Collectors.toList()); updatedAspect.get().setIncompleteForms(new FormAssociationArray(incompleteForms)); diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java index a3db4b029b68bb..1b32ddc7c3ecbc 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java @@ -80,7 +80,7 @@ void appendRunId( * @param input the search input text * @param postFilters the request map with fields and values as filters to be applied to search * hits - * @param sortCriterion {@link SortCriterion} to be applied to search results + * @param sortCriteria list of {@link SortCriterion} to be applied to search results * @param from index to start the search from * @param size the number of search hits to return * @return a {@link SearchResult} that contains a list of matched documents and related search @@ -92,7 +92,7 @@ SearchResult search( @Nonnull List entityNames, @Nonnull String input, @Nullable Filter postFilters, - @Nullable SortCriterion sortCriterion, + List sortCriteria, int from, int size); @@ -108,7 +108,7 @@ SearchResult search( * @param input the search input text * @param postFilters the request map with fields and values as filters to be applied to search * hits - * @param sortCriterion {@link SortCriterion} to be applied to search results + * @param sortCriteria list of {@link SortCriterion} to be applied to search results * @param from index to start the search from * @param size the number of search hits to return * @param facets list of facets we want aggregations for @@ -121,7 +121,7 @@ SearchResult search( @Nonnull List entityNames, @Nonnull String input, @Nullable Filter postFilters, - @Nullable SortCriterion sortCriterion, + List sortCriteria, int from, int size, @Nullable List facets); @@ -132,7 +132,7 @@ SearchResult search( * @param entityName name of the entity * @param filters the request map with fields and values to be applied as filters to the search * query - * @param sortCriterion {@link SortCriterion} to be applied to search results + * @param sortCriteria list of {@link SortCriterion} to be applied to search results * @param from index to start the search from * @param size number of search hits to return * @return a {@link SearchResult} that contains a list of filtered documents and related search @@ -143,7 +143,7 @@ SearchResult filter( @Nonnull OperationContext opContext, @Nonnull String entityName, @Nullable Filter filters, - @Nullable SortCriterion sortCriterion, + List sortCriteria, int from, int size); @@ -265,7 +265,7 @@ List getBrowsePaths( * @param input the search input text * @param postFilters the request map with fields and values as filters to be applied to search * hits - * @param sortCriterion {@link SortCriterion} to be applied to search results + * @param sortCriteria list of {@link SortCriterion} to be applied to search results * @param scrollId opaque scroll identifier to pass to search service * @param size the number of search hits to return * @return a {@link ScrollResult} that contains a list of matched documents and related search @@ -277,7 +277,7 @@ ScrollResult fullTextScroll( @Nonnull List entities, @Nonnull String input, @Nullable Filter postFilters, - @Nullable SortCriterion sortCriterion, + List sortCriteria, @Nullable String scrollId, @Nullable String keepAlive, int size); @@ -290,7 +290,7 @@ ScrollResult fullTextScroll( * @param input the search input text * @param postFilters the request map with fields and values as filters to be applied to search * hits - * @param sortCriterion {@link SortCriterion} to be applied to search results + * @param sortCriteria list of {@link SortCriterion} to be applied to search results * @param scrollId opaque scroll identifier to pass to search service * @param size the number of search hits to return * @return a {@link ScrollResult} that contains a list of matched documents and related search @@ -302,7 +302,7 @@ ScrollResult structuredScroll( @Nonnull List entities, @Nonnull String input, @Nullable Filter postFilters, - @Nullable SortCriterion sortCriterion, + List sortCriteria, @Nullable String scrollId, @Nullable String keepAlive, int size); @@ -316,7 +316,7 @@ ExplainResponse explain( @Nonnull String documentId, @Nonnull String entityName, @Nullable Filter postFilters, - @Nullable SortCriterion sortCriterion, + List sortCriteria, @Nullable String scrollId, @Nullable String keepAlive, int size, diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/timeseries/TimeseriesAspectService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/timeseries/TimeseriesAspectService.java index 6b1f484ac0a518..68c82f0ef2e0da 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/timeseries/TimeseriesAspectService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/timeseries/TimeseriesAspectService.java @@ -226,7 +226,7 @@ TimeseriesScrollResult scrollAspects( @Nonnull final String entityName, @Nonnull final String aspectName, @Nullable Filter filter, - @Nonnull List sortCriterion, + @Nonnull List sortCriteria, @Nullable String scrollId, int count, @Nullable Long startTimeMillis, diff --git a/metadata-service/war/src/main/java/com/linkedin/gms/CommonApplicationConfig.java b/metadata-service/war/src/main/java/com/linkedin/gms/CommonApplicationConfig.java index bc623c3cc983c2..e47a2b4e278e4e 100644 --- a/metadata-service/war/src/main/java/com/linkedin/gms/CommonApplicationConfig.java +++ b/metadata-service/war/src/main/java/com/linkedin/gms/CommonApplicationConfig.java @@ -37,7 +37,10 @@ "com.linkedin.gms.factory.search", "com.linkedin.gms.factory.secret", "com.linkedin.gms.factory.timeseries", - "com.linkedin.gms.factory.plugins" + "com.linkedin.gms.factory.plugins", + "com.linkedin.gms.factory.change", + "com.datahub.event.hook", + "com.linkedin.gms.factory.notifications" }) @PropertySource(value = "classpath:/application.yaml", factory = YamlPropertySourceFactory.class) @Configuration diff --git a/metadata-service/war/src/main/resources/boot/global_settings.json b/metadata-service/war/src/main/resources/boot/global_settings.json index 129783afd6df49..35145b85202a7b 100644 --- a/metadata-service/war/src/main/resources/boot/global_settings.json +++ b/metadata-service/war/src/main/resources/boot/global_settings.json @@ -1,4 +1,8 @@ { "views": { + }, + "docPropagation": { + "enabled": true, + "columnPropagationEnabled": true } } \ No newline at end of file diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/utils/SystemMetadataUtils.java b/metadata-utils/src/main/java/com/linkedin/metadata/utils/SystemMetadataUtils.java index 81bfcaab74ddb8..4cc09f83e5f740 100644 --- a/metadata-utils/src/main/java/com/linkedin/metadata/utils/SystemMetadataUtils.java +++ b/metadata-utils/src/main/java/com/linkedin/metadata/utils/SystemMetadataUtils.java @@ -1,6 +1,8 @@ package com.linkedin.metadata.utils; -import com.linkedin.metadata.Constants; +import static com.linkedin.metadata.Constants.DEFAULT_RUN_ID; + +import com.linkedin.data.template.SetMode; import com.linkedin.mxe.SystemMetadata; import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; @@ -11,13 +13,25 @@ public class SystemMetadataUtils { private SystemMetadataUtils() {} public static SystemMetadata createDefaultSystemMetadata() { - return new SystemMetadata() - .setRunId(Constants.DEFAULT_RUN_ID) - .setLastObserved(System.currentTimeMillis()); + return generateSystemMetadataIfEmpty(null); + } + + public static SystemMetadata createDefaultSystemMetadata(@Nullable String runId) { + return generateSystemMetadataIfEmpty( + new SystemMetadata() + .setRunId(runId, SetMode.REMOVE_IF_NULL) + .setLastObserved(System.currentTimeMillis())); } public static SystemMetadata generateSystemMetadataIfEmpty( @Nullable SystemMetadata systemMetadata) { - return systemMetadata == null ? createDefaultSystemMetadata() : systemMetadata; + SystemMetadata result = systemMetadata == null ? new SystemMetadata() : systemMetadata; + if (result.getRunId() == null) { + result.setRunId(DEFAULT_RUN_ID); + } + if (!result.hasLastObserved() || result.getLastObserved() == 0) { + result.setLastObserved(System.currentTimeMillis()); + } + return result; } } diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/utils/elasticsearch/IndexConvention.java b/metadata-utils/src/main/java/com/linkedin/metadata/utils/elasticsearch/IndexConvention.java index 4a3f78fcef7bd6..87aebabf643666 100644 --- a/metadata-utils/src/main/java/com/linkedin/metadata/utils/elasticsearch/IndexConvention.java +++ b/metadata-utils/src/main/java/com/linkedin/metadata/utils/elasticsearch/IndexConvention.java @@ -47,4 +47,7 @@ public interface IndexConvention { * if one cannot be extracted */ Optional> getEntityAndAspectName(String timeseriesAspectIndexName); + + @Nonnull + String getIdHashAlgo(); } diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/utils/elasticsearch/IndexConventionImpl.java b/metadata-utils/src/main/java/com/linkedin/metadata/utils/elasticsearch/IndexConventionImpl.java index 47801cd2054fa4..2c9c927cd8c347 100644 --- a/metadata-utils/src/main/java/com/linkedin/metadata/utils/elasticsearch/IndexConventionImpl.java +++ b/metadata-utils/src/main/java/com/linkedin/metadata/utils/elasticsearch/IndexConventionImpl.java @@ -8,25 +8,30 @@ import java.util.concurrent.ConcurrentHashMap; import javax.annotation.Nonnull; import javax.annotation.Nullable; +import lombok.Getter; import org.apache.commons.lang3.StringUtils; // Default implementation of search index naming convention public class IndexConventionImpl implements IndexConvention { - public static final IndexConvention NO_PREFIX = new IndexConventionImpl(null); + public static IndexConvention noPrefix(@Nonnull String idHashAlgo) { + return new IndexConventionImpl(null, idHashAlgo); + } // Map from Entity name -> Index name private final Map indexNameMapping = new ConcurrentHashMap<>(); private final Optional _prefix; private final String _getAllEntityIndicesPattern; private final String _getAllTimeseriesIndicesPattern; + @Getter private final String idHashAlgo; private static final String ENTITY_INDEX_VERSION = "v2"; private static final String ENTITY_INDEX_SUFFIX = "index"; private static final String TIMESERIES_INDEX_VERSION = "v1"; private static final String TIMESERIES_ENTITY_INDEX_SUFFIX = "aspect"; - public IndexConventionImpl(@Nullable String prefix) { + public IndexConventionImpl(@Nullable String prefix, String idHashAlgo) { _prefix = StringUtils.isEmpty(prefix) ? Optional.empty() : Optional.of(prefix); + this.idHashAlgo = idHashAlgo; _getAllEntityIndicesPattern = _prefix.map(p -> p + "_").orElse("") + "*" diff --git a/metadata-utils/src/test/java/com/linkedin/metadata/utils/elasticsearch/IndexConventionImplTest.java b/metadata-utils/src/test/java/com/linkedin/metadata/utils/elasticsearch/IndexConventionImplTest.java index 8074f344cd2441..2f6c7138d3c4fb 100644 --- a/metadata-utils/src/test/java/com/linkedin/metadata/utils/elasticsearch/IndexConventionImplTest.java +++ b/metadata-utils/src/test/java/com/linkedin/metadata/utils/elasticsearch/IndexConventionImplTest.java @@ -10,7 +10,7 @@ public class IndexConventionImplTest { @Test public void testIndexConventionNoPrefix() { - IndexConvention indexConventionNoPrefix = IndexConventionImpl.NO_PREFIX; + IndexConvention indexConventionNoPrefix = IndexConventionImpl.noPrefix("MD5"); String entityName = "dataset"; String expectedIndexName = "datasetindex_v2"; assertEquals(indexConventionNoPrefix.getEntityIndexName(entityName), expectedIndexName); @@ -25,7 +25,7 @@ public void testIndexConventionNoPrefix() { @Test public void testIndexConventionPrefix() { - IndexConvention indexConventionPrefix = new IndexConventionImpl("prefix"); + IndexConvention indexConventionPrefix = new IndexConventionImpl("prefix", "MD5"); String entityName = "dataset"; String expectedIndexName = "prefix_datasetindex_v2"; assertEquals(indexConventionPrefix.getEntityIndexName(entityName), expectedIndexName); @@ -42,7 +42,7 @@ public void testIndexConventionPrefix() { @Test public void testTimeseriesIndexConventionNoPrefix() { - IndexConvention indexConventionNoPrefix = IndexConventionImpl.NO_PREFIX; + IndexConvention indexConventionNoPrefix = IndexConventionImpl.noPrefix("MD5"); String entityName = "dataset"; String aspectName = "datasetusagestatistics"; String expectedIndexName = "dataset_datasetusagestatisticsaspect_v1"; @@ -64,7 +64,7 @@ public void testTimeseriesIndexConventionNoPrefix() { @Test public void testTimeseriesIndexConventionPrefix() { - IndexConvention indexConventionPrefix = new IndexConventionImpl("prefix"); + IndexConvention indexConventionPrefix = new IndexConventionImpl("prefix", "MD5"); String entityName = "dataset"; String aspectName = "datasetusagestatistics"; String expectedIndexName = "prefix_dataset_datasetusagestatisticsaspect_v1"; diff --git a/smoke-test/build.gradle b/smoke-test/build.gradle index 9800cf65fc4529..a9e5a8942b71ec 100644 --- a/smoke-test/build.gradle +++ b/smoke-test/build.gradle @@ -44,12 +44,19 @@ task yarnInstall(type: YarnTask) { environment = ['NODE_OPTIONS': '--openssl-legacy-provider'] args = ['install', '--cwd', "${project.rootDir}/smoke-test/tests/cypress"] } + task cypressLint(type: YarnTask, dependsOn: yarnInstall) { environment = ['NODE_OPTIONS': '--openssl-legacy-provider'] // TODO: Run a full lint instead of just format. args = ['--cwd', "${project.rootDir}/smoke-test/tests/cypress", 'run', 'format'] } +task cypressLintFix(type: YarnTask, dependsOn: yarnInstall) { + environment = ['NODE_OPTIONS': '--openssl-legacy-provider'] + // TODO: Run a full lint instead of just format. + args = ['--cwd', "${project.rootDir}/smoke-test/tests/cypress", 'run', 'format', '--write'] +} + task installDev(type: Exec) { inputs.file file('pyproject.toml') inputs.file file('requirements.txt') @@ -86,8 +93,6 @@ task pythonLintFix(type: Exec, dependsOn: installDev) { */ task noCypressSuite0(type: Exec, dependsOn: [installDev, ':metadata-ingestion:installDev']) { environment 'RUN_QUICKSTART', 'false' - environment 'DATAHUB_KAFKA_SCHEMA_REGISTRY_URL', 'http://localhost:8080/schema-registry/api/' - environment 'KAFKA_BROKER_CONTAINER', 'datahub-kafka-broker-1' environment 'TEST_STRATEGY', 'no_cypress_suite0' workingDir = project.projectDir @@ -98,8 +103,6 @@ task noCypressSuite0(type: Exec, dependsOn: [installDev, ':metadata-ingestion:in task noCypressSuite1(type: Exec, dependsOn: [installDev, ':metadata-ingestion:installDev']) { environment 'RUN_QUICKSTART', 'false' - environment 'DATAHUB_KAFKA_SCHEMA_REGISTRY_URL', 'http://localhost:8080/schema-registry/api/' - environment 'KAFKA_BROKER_CONTAINER', 'datahub-kafka-broker-1' environment 'TEST_STRATEGY', 'no_cypress_suite1' workingDir = project.projectDir @@ -110,8 +113,6 @@ task noCypressSuite1(type: Exec, dependsOn: [installDev, ':metadata-ingestion:in task cypressSuite1(type: Exec, dependsOn: [installDev, ':metadata-ingestion:installDev']) { environment 'RUN_QUICKSTART', 'false' - environment 'DATAHUB_KAFKA_SCHEMA_REGISTRY_URL', 'http://localhost:8080/schema-registry/api/' - environment 'KAFKA_BROKER_CONTAINER', 'datahub-kafka-broker-1' environment 'TEST_STRATEGY', 'cypress_suite1' workingDir = project.projectDir @@ -122,8 +123,6 @@ task cypressSuite1(type: Exec, dependsOn: [installDev, ':metadata-ingestion:inst task cypressRest(type: Exec, dependsOn: [installDev, ':metadata-ingestion:installDev']) { environment 'RUN_QUICKSTART', 'false' - environment 'DATAHUB_KAFKA_SCHEMA_REGISTRY_URL', 'http://localhost:8080/schema-registry/api/' - environment 'KAFKA_BROKER_CONTAINER', 'datahub-kafka-broker-1' environment 'TEST_STRATEGY', 'cypress_rest' workingDir = project.projectDir @@ -137,8 +136,6 @@ task cypressRest(type: Exec, dependsOn: [installDev, ':metadata-ingestion:instal */ task cypressDev(type: Exec, dependsOn: [installDev, ':metadata-ingestion:installDev']) { environment 'RUN_QUICKSTART', 'false' - environment 'DATAHUB_KAFKA_SCHEMA_REGISTRY_URL', 'http://localhost:8080/schema-registry/api/' - environment 'KAFKA_BROKER_CONTAINER', 'datahub-kafka-broker-1' workingDir = project.projectDir commandLine 'bash', '-c', @@ -151,12 +148,18 @@ task cypressDev(type: Exec, dependsOn: [installDev, ':metadata-ingestion:install */ task cypressData(type: Exec, dependsOn: [installDev, ':metadata-ingestion:installDev']) { environment 'RUN_QUICKSTART', 'false' - environment 'DATAHUB_KAFKA_SCHEMA_REGISTRY_URL', 'http://localhost:8080/schema-registry/api/' - environment 'KAFKA_BROKER_CONTAINER', 'datahub-kafka-broker-1' environment 'RUN_UI', 'false' workingDir = project.projectDir commandLine 'bash', '-c', "source ${venv_name}/bin/activate && set -x && " + "./cypress-dev.sh" -} \ No newline at end of file +} + +task lint { + dependsOn pythonLint, cypressLint +} + +task lintFix { + dependsOn pythonLintFix +} diff --git a/smoke-test/cypress-dev.sh b/smoke-test/cypress-dev.sh index 2b31c574d05787..bce2d794b18691 100755 --- a/smoke-test/cypress-dev.sh +++ b/smoke-test/cypress-dev.sh @@ -10,8 +10,9 @@ fi source venv/bin/activate -export KAFKA_BROKER_CONTAINER="datahub-kafka-broker-1" -export KAFKA_BOOTSTRAP_SERVER="broker:9092" +# set environment variables for the test +source ./set-test-env-vars.sh + python -c 'from tests.cypress.integration_test import ingest_data; ingest_data()' cd tests/cypress diff --git a/smoke-test/requirements.txt b/smoke-test/requirements.txt index 861c69f354fe5b..952e8ed355d052 100644 --- a/smoke-test/requirements.txt +++ b/smoke-test/requirements.txt @@ -18,3 +18,4 @@ types-requests>=2.28.11.6,<=2.31.0.3 types-PyYAML # https://github.com/docker/docker-py/issues/3256 requests<=2.31.0 +deepdiff \ No newline at end of file diff --git a/smoke-test/run-quickstart.sh b/smoke-test/run-quickstart.sh index 05c321566d54a6..eb0d46b3172442 100755 --- a/smoke-test/run-quickstart.sh +++ b/smoke-test/run-quickstart.sh @@ -10,16 +10,17 @@ source venv/bin/activate mkdir -p ~/.datahub/plugins/frontend/auth/ echo "test_user:test_pass" >> ~/.datahub/plugins/frontend/auth/user.props +echo "DATAHUB_VERSION = $DATAHUB_VERSION" DATAHUB_SEARCH_IMAGE="${DATAHUB_SEARCH_IMAGE:=opensearchproject/opensearch}" DATAHUB_SEARCH_TAG="${DATAHUB_SEARCH_TAG:=2.9.0}" XPACK_SECURITY_ENABLED="${XPACK_SECURITY_ENABLED:=plugins.security.disabled=true}" ELASTICSEARCH_USE_SSL="${ELASTICSEARCH_USE_SSL:=false}" USE_AWS_ELASTICSEARCH="${USE_AWS_ELASTICSEARCH:=true}" -echo "DATAHUB_VERSION = $DATAHUB_VERSION" DATAHUB_TELEMETRY_ENABLED=false \ DOCKER_COMPOSE_BASE="file://$( dirname "$DIR" )" \ DATAHUB_SEARCH_IMAGE="$DATAHUB_SEARCH_IMAGE" DATAHUB_SEARCH_TAG="$DATAHUB_SEARCH_TAG" \ XPACK_SECURITY_ENABLED="$XPACK_SECURITY_ENABLED" ELASTICSEARCH_USE_SSL="$ELASTICSEARCH_USE_SSL" \ USE_AWS_ELASTICSEARCH="$USE_AWS_ELASTICSEARCH" \ -datahub docker quickstart --version ${DATAHUB_VERSION} --standalone_consumers --dump-logs-on-failure --kafka-setup +DATAHUB_VERSION=${DATAHUB_VERSION} \ +docker compose --project-directory ../docker/profiles --profile quickstart-consumers up -d --quiet-pull --wait --wait-timeout 900 diff --git a/smoke-test/set-cypress-creds.sh b/smoke-test/set-cypress-creds.sh index 82fe736b0a7e18..fc6e7dd42f5dea 100644 --- a/smoke-test/set-cypress-creds.sh +++ b/smoke-test/set-cypress-creds.sh @@ -2,4 +2,4 @@ export CYPRESS_ADMIN_USERNAME=${ADMIN_USERNAME:-datahub} export CYPRESS_ADMIN_PASSWORD=${ADMIN_PASSWORD:-datahub} -export CYPRESS_ADMIN_DISPLAYNAME=${ADMIN_DISPLAYNAME:-DataHub} \ No newline at end of file +export CYPRESS_ADMIN_DISPLAYNAME=${ADMIN_DISPLAYNAME:-DataHub} diff --git a/smoke-test/set-test-env-vars.sh b/smoke-test/set-test-env-vars.sh new file mode 100644 index 00000000000000..4668721f80de08 --- /dev/null +++ b/smoke-test/set-test-env-vars.sh @@ -0,0 +1,2 @@ +export DATAHUB_KAFKA_SCHEMA_REGISTRY_URL=http://localhost:8080/schema-registry/api +export DATAHUB_GMS_URL=http://localhost:8080 \ No newline at end of file diff --git a/smoke-test/smoke.sh b/smoke-test/smoke.sh index fafb2076fe6990..5b3e8a9377a6ca 100755 --- a/smoke-test/smoke.sh +++ b/smoke-test/smoke.sh @@ -16,15 +16,23 @@ cd "$DIR" if [ "${RUN_QUICKSTART:-true}" == "true" ]; then source ./run-quickstart.sh +else + mkdir -p ~/.datahub/plugins/frontend/auth/ + echo "test_user:test_pass" >> ~/.datahub/plugins/frontend/auth/user.props + echo "datahub:datahub" > ~/.datahub/plugins/frontend/auth/user.props + + python3 -m venv venv + source venv/bin/activate + python -m pip install --upgrade pip uv>=0.1.10 wheel setuptools + uv pip install -r requirements.txt fi -source venv/bin/activate - (cd ..; ./gradlew :smoke-test:yarnInstall) source ./set-cypress-creds.sh -export DATAHUB_GMS_URL=http://localhost:8080 +# set environment variables for the test +source ./set-test-env-vars.sh # no_cypress_suite0, no_cypress_suite1, cypress_suite1, cypress_rest if [[ -z "${TEST_STRATEGY}" ]]; then diff --git a/smoke-test/test_e2e.py b/smoke-test/test_e2e.py index abb4841314c4af..74d64a8193173a 100644 --- a/smoke-test/test_e2e.py +++ b/smoke-test/test_e2e.py @@ -21,6 +21,7 @@ get_frontend_session, get_admin_credentials, get_root_urn, + wait_for_writes_to_sync, ) bootstrap_sample_data = "../metadata-ingestion/examples/mce_files/bootstrap_mce.json" @@ -150,11 +151,13 @@ def _ensure_group_not_present(urn: str, frontend_session) -> Any: def test_ingestion_via_rest(wait_for_healthchecks): ingest_file_via_rest(bootstrap_sample_data) _ensure_user_present(urn=get_root_urn()) + wait_for_writes_to_sync() @pytest.mark.dependency(depends=["test_healthchecks"]) def test_ingestion_usage_via_rest(wait_for_healthchecks): ingest_file_via_rest(usage_sample_data) + wait_for_writes_to_sync() @pytest.mark.dependency(depends=["test_healthchecks"]) @@ -185,6 +188,7 @@ def test_ingestion_via_kafka(wait_for_healthchecks): # Since Kafka emission is asynchronous, we must wait a little bit so that # the changes are actually processed. time.sleep(kafka_post_ingestion_wait_sec) + wait_for_writes_to_sync() @pytest.mark.dependency( @@ -196,6 +200,7 @@ def test_ingestion_via_kafka(wait_for_healthchecks): ) def test_run_ingestion(wait_for_healthchecks): # Dummy test so that future ones can just depend on this one. + wait_for_writes_to_sync() pass @@ -1384,7 +1389,9 @@ def test_native_user_endpoints(frontend_session): unauthenticated_get_invite_token_response = unauthenticated_session.post( f"{get_frontend_url()}/api/v2/graphql", json=get_invite_token_json ) - assert unauthenticated_get_invite_token_response.status_code == HTTPStatus.UNAUTHORIZED + assert ( + unauthenticated_get_invite_token_response.status_code == HTTPStatus.UNAUTHORIZED + ) unauthenticated_create_reset_token_json = { "query": """mutation createNativeUserResetToken($input: CreateNativeUserResetTokenInput!) {\n @@ -1399,7 +1406,10 @@ def test_native_user_endpoints(frontend_session): f"{get_frontend_url()}/api/v2/graphql", json=unauthenticated_create_reset_token_json, ) - assert unauthenticated_create_reset_token_response.status_code == HTTPStatus.UNAUTHORIZED + assert ( + unauthenticated_create_reset_token_response.status_code + == HTTPStatus.UNAUTHORIZED + ) # cleanup steps json = { diff --git a/smoke-test/tests/consistency_utils.py b/smoke-test/tests/consistency_utils.py index 4335e2a874c1e7..1eddc46bb220b7 100644 --- a/smoke-test/tests/consistency_utils.py +++ b/smoke-test/tests/consistency_utils.py @@ -8,14 +8,31 @@ ELASTICSEARCH_REFRESH_INTERVAL_SECONDS: int = int( os.getenv("ELASTICSEARCH_REFRESH_INTERVAL_SECONDS", 5) ) -KAFKA_BROKER_CONTAINER: str = str( - os.getenv("KAFKA_BROKER_CONTAINER", "datahub-broker-1") -) KAFKA_BOOTSTRAP_SERVER: str = str(os.getenv("KAFKA_BOOTSTRAP_SERVER", "broker:29092")) logger = logging.getLogger(__name__) +def infer_kafka_broker_container() -> str: + cmd = "docker ps --format '{{.Names}}' | grep broker" + completed_process = subprocess.run( + cmd, + capture_output=True, + shell=True, + text=True, + ) + result = str(completed_process.stdout) + lines = result.splitlines() + if len(lines) == 0: + raise ValueError("No Kafka broker containers found") + return lines[0] + + +KAFKA_BROKER_CONTAINER: str = str( + os.getenv("KAFKA_BROKER_CONTAINER", infer_kafka_broker_container()) +) + + def wait_for_writes_to_sync(max_timeout_in_sec: int = 120) -> None: if USE_STATIC_SLEEP: time.sleep(ELASTICSEARCH_REFRESH_INTERVAL_SECONDS) @@ -44,7 +61,9 @@ def wait_for_writes_to_sync(max_timeout_in_sec: int = 120) -> None: if maximum_lag == 0: lag_zero = True except ValueError: - logger.warning(f"Error reading kafka lag using command: {cmd}") + logger.warning( + f"Error reading kafka lag using command: {cmd}", exc_info=True + ) if not lag_zero: logger.warning( diff --git a/smoke-test/tests/cypress/cypress/e2e/actions/docPropagation.js b/smoke-test/tests/cypress/cypress/e2e/actions/docPropagation.js new file mode 100644 index 00000000000000..3d7e14195ab64f --- /dev/null +++ b/smoke-test/tests/cypress/cypress/e2e/actions/docPropagation.js @@ -0,0 +1,27 @@ +const testId = '[data-testid="docPropagationIndicator"]'; + +describe("docPropagation", () => { + it("logs in and navigates to the schema page and checks for docPropagationIndicator", () => { + cy.login(); + cy.visit( + "/dataset/urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_deleted,PROD)/Schema?is_lineage_mode=false&schemaFilter=", + "/dataset/urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_deleted,PROD)/Schema?is_lineage_mode=false&schemaFilter=", + ); + + // verify that the indicator exists in the table + cy.get(testId).should("exist"); + + // click on the table row + cy.get('[data-row-key="user_id"]').click(); + + // verify that the indicator exists in id="entity-profile-sidebar" + cy.get('[id="entity-profile-sidebar"]') + .then(($sidebar) => { + if ($sidebar.find(testId).length) return testId; + return null; + }) + .then((selector) => { + cy.get(selector).should("exist"); + }); + }); +}); diff --git a/smoke-test/tests/cypress/data.json b/smoke-test/tests/cypress/data.json index 5253b7a33b085f..ce61f7c83a0389 100644 --- a/smoke-test/tests/cypress/data.json +++ b/smoke-test/tests/cypress/data.json @@ -96,7 +96,11 @@ }, "nativeDataType": "varchar(100)", "globalTags": { - "tags": [{ "tag": "urn:li:tag:NeedsDocumentation" }] + "tags": [ + { + "tag": "urn:li:tag:NeedsDocumentation" + } + ] }, "recursive": false }, @@ -137,7 +141,11 @@ }, { "com.linkedin.pegasus2avro.common.GlobalTags": { - "tags": [{ "tag": "urn:li:tag:Cypress" }] + "tags": [ + { + "tag": "urn:li:tag:Cypress" + } + ] } } ] @@ -246,7 +254,13 @@ "editableSchemaFieldInfo": [ { "fieldPath": "shipment_info", - "globalTags": { "tags": [{ "tag": "urn:li:tag:Legacy" }] }, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Legacy" + } + ] + }, "glossaryTerms": { "terms": [ { @@ -401,8 +415,12 @@ { "com.linkedin.pegasus2avro.common.GlobalTags": { "tags": [ - { "tag": "urn:li:tag:Cypress" }, - { "tag": "urn:li:tag:Cypress2" } + { + "tag": "urn:li:tag:Cypress" + }, + { + "tag": "urn:li:tag:Cypress2" + } ] } } @@ -542,7 +560,11 @@ }, { "com.linkedin.pegasus2avro.common.GlobalTags": { - "tags": [{ "tag": "urn:li:tag:Cypress" }] + "tags": [ + { + "tag": "urn:li:tag:Cypress" + } + ] } } ] @@ -718,7 +740,11 @@ }, { "com.linkedin.pegasus2avro.common.GlobalTags": { - "tags": [{ "tag": "urn:li:tag:Cypress" }] + "tags": [ + { + "tag": "urn:li:tag:Cypress" + } + ] } } ] @@ -1011,7 +1037,11 @@ }, { "com.linkedin.pegasus2avro.common.GlobalTags": { - "tags": [{ "tag": "urn:li:tag:Cypress" }] + "tags": [ + { + "tag": "urn:li:tag:Cypress" + } + ] } } ] @@ -1229,7 +1259,11 @@ }, { "com.linkedin.pegasus2avro.common.GlobalTags": { - "tags": [{ "tag": "urn:li:tag:Cypress" }] + "tags": [ + { + "tag": "urn:li:tag:Cypress" + } + ] } } ] @@ -1279,7 +1313,11 @@ }, { "com.linkedin.pegasus2avro.common.GlobalTags": { - "tags": [{ "tag": "urn:li:tag:Cypress" }] + "tags": [ + { + "tag": "urn:li:tag:Cypress" + } + ] } } ] @@ -1332,7 +1370,11 @@ }, { "com.linkedin.pegasus2avro.common.GlobalTags": { - "tags": [{ "tag": "urn:li:tag:Cypress" }] + "tags": [ + { + "tag": "urn:li:tag:Cypress" + } + ] } } ] @@ -1371,7 +1413,11 @@ }, { "com.linkedin.pegasus2avro.common.GlobalTags": { - "tags": [{ "tag": "urn:li:tag:Cypress" }] + "tags": [ + { + "tag": "urn:li:tag:Cypress" + } + ] } } ] @@ -1413,7 +1459,11 @@ }, { "com.linkedin.pegasus2avro.common.GlobalTags": { - "tags": [{ "tag": "urn:li:tag:Cypress" }] + "tags": [ + { + "tag": "urn:li:tag:Cypress" + } + ] } } ] @@ -1459,7 +1509,11 @@ }, { "com.linkedin.pegasus2avro.common.GlobalTags": { - "tags": [{ "tag": "urn:li:tag:Cypress" }] + "tags": [ + { + "tag": "urn:li:tag:Cypress" + } + ] } } ] @@ -1521,7 +1575,11 @@ }, { "com.linkedin.pegasus2avro.common.GlobalTags": { - "tags": [{ "tag": "urn:li:tag:Cypress" }] + "tags": [ + { + "tag": "urn:li:tag:Cypress" + } + ] } } ] @@ -1758,7 +1816,11 @@ }, { "com.linkedin.pegasus2avro.common.GlobalTags": { - "tags": [{ "tag": "urn:li:tag:CypressFeatureTag" }] + "tags": [ + { + "tag": "urn:li:tag:CypressFeatureTag" + } + ] } } ] @@ -1785,7 +1847,11 @@ }, { "com.linkedin.pegasus2avro.common.GlobalTags": { - "tags": [{ "tag": "urn:li:tag:CypressPrimaryKeyTag" }] + "tags": [ + { + "tag": "urn:li:tag:CypressPrimaryKeyTag" + } + ] } } ] @@ -2137,5 +2203,17 @@ "contentType": "application/json" }, "systemMetadata": null + }, + { + "auditHeader": null, + "entityType": "schemaField", + "entityUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_deleted,PROD),user_id)", + "changeType": "UPSERT", + "aspectName": "documentation", + "aspect": { + "value": "{\"documentations\":[{\"attribution\":{\"actor\":\"urn:li:corpuser:__datahub_system\",\"source\":\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_created,PROD),user_id)\",\"sourceDetail\":{\"actor\":\"urn:li:corpuser:shirshanka@acryl.io\",\"origin\":\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_created,PROD),user_id)\",\"propagated\":\"true\"},\"time\":1721422917808},\"documentation\":\"Unique identifier of user profile.\"}]}", + "contentType": "application/json" + }, + "systemMetadata": null } ] diff --git a/smoke-test/tests/openapi/README.md b/smoke-test/tests/openapi/README.md new file mode 100644 index 00000000000000..68e2a75a6509b9 --- /dev/null +++ b/smoke-test/tests/openapi/README.md @@ -0,0 +1,33 @@ + +# Goal + +This test is configuration driven by json files which contain request/response sequences intended to +detect unexpected regressions between releases. + +Files can be executed in parallel but each request within the file is sequential. + +## Adding a test + +Create a file for a given OpenAPI version which contains a list of request/response pairs in the following +format. + +The request json object is translated into the python request arguments and the response object is the +expected status code and optional body. + +```json +[ + { + "request": { + "urn": "", + "description": "", + "method": "", + "json": {} + }, + "response": { + "status_codes": [200], + "exclude_regex_paths": [], + "json": {} + } + } +] +``` \ No newline at end of file diff --git a/docs-website/src/pages/docs/_components/CustomerCardSection/customercardsection.module.scss b/smoke-test/tests/openapi/__init__.py similarity index 100% rename from docs-website/src/pages/docs/_components/CustomerCardSection/customercardsection.module.scss rename to smoke-test/tests/openapi/__init__.py diff --git a/smoke-test/tests/openapi/test_openapi.py b/smoke-test/tests/openapi/test_openapi.py new file mode 100644 index 00000000000000..20398e0e581685 --- /dev/null +++ b/smoke-test/tests/openapi/test_openapi.py @@ -0,0 +1,113 @@ +import concurrent.futures +import glob +import json +import logging + +import pytest +from deepdiff import DeepDiff + +import requests_wrapper as requests +from tests.utils import get_gms_url + +logger = logging.getLogger(__name__) + + +@pytest.mark.dependency() +def test_healthchecks(wait_for_healthchecks): + # Call to wait_for_healthchecks fixture will do the actual functionality. + pass + + +def load_tests(fixture_glob="tests/openapi/**/*.json"): + for test_fixture in glob.glob(fixture_glob): + with open(test_fixture) as f: + yield (test_fixture, json.load(f)) + + +def execute_request(request): + session = requests.Session() + if "method" in request: + method = request.pop("method") + else: + method = "post" + + url = get_gms_url() + request.pop("url") + + return getattr(session, method)(url, **request) + + +def evaluate_test(test_name, test_data): + try: + for idx, req_resp in enumerate(test_data): + if "description" in req_resp["request"]: + description = req_resp["request"].pop("description") + else: + description = None + url = req_resp["request"]["url"] + actual_resp = execute_request(req_resp["request"]) + try: + if "response" in req_resp and "status_codes" in req_resp["response"]: + assert ( + actual_resp.status_code in req_resp["response"]["status_codes"] + ) + else: + assert actual_resp.status_code in [200, 202, 204] + if "response" in req_resp: + if "json" in req_resp["response"]: + if "exclude_regex_paths" in req_resp["response"]: + exclude_regex_paths = req_resp["response"][ + "exclude_regex_paths" + ] + else: + exclude_regex_paths = [] + diff = DeepDiff( + actual_resp.json(), + req_resp["response"]["json"], + exclude_regex_paths=exclude_regex_paths, + ignore_order=True, + ) + assert not diff + else: + logger.warning("No expected response json found") + except Exception as e: + logger.error( + f"Error executing step: {idx}, url: {url}, test: {test_name}" + ) + if description: + logger.error(f"Step {idx} Description: {description}") + logger.error(f"Response content: {actual_resp.content}") + raise e + except Exception as e: + logger.error(f"Error executing test: {test_name}") + raise e + + +def run_tests(fixture_globs, num_workers=3): + with concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) as executor: + futures = [] + for fixture_glob in fixture_globs: + for test_fixture, test_data in load_tests(fixture_glob=fixture_glob): + futures.append(executor.submit(evaluate_test, test_fixture, test_data)) + + for future in concurrent.futures.as_completed(futures): + logger.info(future.result()) + + +@pytest.mark.dependency(depends=["test_healthchecks"]) +def test_openapi_all(): + run_tests(fixture_globs=["tests/openapi/*/*.json"], num_workers=10) + + +# @pytest.mark.dependency(depends=["test_healthchecks"]) +# def test_openapi_v1(): +# run_tests(fixture_glob="tests/openapi/v1/*.json", num_workers=4) +# +# +# @pytest.mark.dependency(depends=["test_healthchecks"]) +# def test_openapi_v2(): +# run_tests(fixture_glob="tests/openapi/v2/*.json", num_workers=4) +# +# +# @pytest.mark.dependency(depends=["test_healthchecks"]) +# def test_openapi_v3(): +# run_tests(fixture_glob="tests/openapi/v3/*.json", num_workers=4) diff --git a/smoke-test/tests/openapi/v1/__init__.py b/smoke-test/tests/openapi/v1/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/smoke-test/tests/openapi/v1/timeline.json b/smoke-test/tests/openapi/v1/timeline.json new file mode 100644 index 00000000000000..36459d1b9e8243 --- /dev/null +++ b/smoke-test/tests/openapi/v1/timeline.json @@ -0,0 +1,526 @@ +[ + { + "request": { + "url": "/openapi/v2/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Atest%2CdatasetTimelineV1%2CPROD%29", + "description": "Remove test dataset", + "method": "delete" + } + }, + { + "request": { + "url": "/openapi/v2/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Atest%2CdatasetTimelineV1%2CPROD%29/schemaMetadata?createIfNotExists=false", + "description": "Schema version 1", + "json": { + "schemaName": "db1.nested_struct_test", + "platform": "urn:li:dataPlatform:hive", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": "urn:li:corpuser:jdoe" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": "urn:li:corpuser:jdoe" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "property_id", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "int", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "struct>", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"struct>\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=string].type", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "struct", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"struct\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=string].name", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "varchar(50)", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"varchar(50)\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=int].id", + "nullable": true, + "description": "Service provider id", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "tinyint", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"tinyint\", \"_nullable\": true}" + } + ] + } + } + }, + { + "request": { + "url": "/openapi/v2/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Atest%2CdatasetTimelineV1%2CPROD%29/schemaMetadata?createIfNotExists=false", + "description": "Schema version 2", + "json": { + "schemaName": "db1.nested_struct_test", + "platform": "urn:li:dataPlatform:hive", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": "urn:li:corpuser:jdoe" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": "urn:li:corpuser:jdoe" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "property_id", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "int", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "struct>", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"struct>\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=string].type", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "struct", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"struct\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=int].id3", + "description": "Service provider name", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "tinyint", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"tinyint\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=int].id2", + "nullable": true, + "description": "Service provider id", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "tinyint", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"tinyint\", \"_nullable\": true}" + } + ] + } + } + }, + { + "request": { + "url": "/openapi/v2/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Atest%2CdatasetTimelineV1%2CPROD%29/schemaMetadata?createIfNotExists=false", + "description": "Schema version 3", + "json": { + "schemaName": "db1.nested_struct_test", + "platform": "urn:li:dataPlatform:hive", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": "urn:li:corpuser:jdoe" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": "urn:li:corpuser:jdoe" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "property_id", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "int", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "struct>", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"struct>\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=string].type", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "struct", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"struct\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=string].name", + "description": "Service provider name", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "varchar(50)", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"varchar(50)\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=int].id", + "nullable": true, + "description": "Service provider id", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "tinyint", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"tinyint\", \"_nullable\": true}" + } + ] + } + } + }, + { + "request": { + "url": "/openapi/timeline/v1/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Atest%2CdatasetTimelineV1%2CPROD%29?startTime=-1&endTime=0&raw=false&categories=TECHNICAL_SCHEMA", + "method": "get", + "description": "Get timeline response" + }, + "response": { + "exclude_regex_paths": [ + "root\\[.+?\\]\\['timestamp'\\]" + ], + "json": [ + { + "timestamp": 1723245258298, + "actor": "urn:li:corpuser:__datahub_system", + "semVer": "0.0.0-computed", + "semVerChange": "MINOR", + "changeEvents": [ + { + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD)", + "category": "TECHNICAL_SCHEMA", + "operation": "ADD", + "modifier": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD),service)", + "parameters": { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service", + "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD),service)", + "nullable": true + }, + "semVerChange": "MINOR", + "description": "A forwards & backwards compatible change due to the newly added field 'service'." + }, + { + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD)", + "category": "TECHNICAL_SCHEMA", + "operation": "ADD", + "modifier": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD),service.type)", + "parameters": { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=string].type", + "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD),service.type)", + "nullable": true + }, + "semVerChange": "MINOR", + "description": "A forwards & backwards compatible change due to the newly added field 'service.type'." + }, + { + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD)", + "category": "TECHNICAL_SCHEMA", + "operation": "ADD", + "modifier": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD),service.provider)", + "parameters": { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider", + "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD),service.provider)", + "nullable": true + }, + "semVerChange": "MINOR", + "description": "A forwards & backwards compatible change due to the newly added field 'service.provider'." + }, + { + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD)", + "category": "TECHNICAL_SCHEMA", + "operation": "ADD", + "modifier": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD),service.provider.id)", + "parameters": { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=int].id", + "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD),service.provider.id)", + "nullable": true + }, + "semVerChange": "MINOR", + "description": "A forwards & backwards compatible change due to the newly added field 'service.provider.id'." + }, + { + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD)", + "category": "TECHNICAL_SCHEMA", + "operation": "ADD", + "modifier": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD),service.provider.name)", + "parameters": { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=string].name", + "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD),service.provider.name)", + "nullable": true + }, + "semVerChange": "MINOR", + "description": "A forwards & backwards compatible change due to the newly added field 'service.provider.name'." + }, + { + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD)", + "category": "TECHNICAL_SCHEMA", + "operation": "ADD", + "modifier": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD),property_id)", + "parameters": { + "fieldPath": "property_id", + "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD),property_id)", + "nullable": false + }, + "semVerChange": "MINOR", + "description": "A forwards & backwards compatible change due to the newly added field 'property_id'." + } + ], + "versionStamp": "browsePathsV2:0;dataPlatformInstance:0;datasetKey:0;schemaMetadata:1" + }, + { + "timestamp": 1723245269788, + "actor": "urn:li:corpuser:__datahub_system", + "semVer": "1.0.0-computed", + "semVerChange": "MAJOR", + "changeEvents": [ + { + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD)", + "category": "TECHNICAL_SCHEMA", + "operation": "MODIFY", + "modifier": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD),service.provider.id)", + "parameters": { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=int].id", + "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD),service.provider.id)", + "nullable": true + }, + "semVerChange": "MINOR", + "description": "A forwards & backwards compatible change due to renaming of the field 'service.provider.id to service.provider.id2'." + }, + { + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD)", + "category": "TECHNICAL_SCHEMA", + "operation": "ADD", + "modifier": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD),service.provider.id3)", + "parameters": { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=int].id3", + "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD),service.provider.id3)", + "nullable": true + }, + "semVerChange": "MINOR", + "description": "A forwards & backwards compatible change due to the newly added field 'service.provider.id3'." + }, + { + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD)", + "category": "TECHNICAL_SCHEMA", + "operation": "REMOVE", + "modifier": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD),service.provider.name)", + "parameters": { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=string].name", + "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD),service.provider.name)", + "nullable": true + }, + "semVerChange": "MAJOR", + "description": "A backwards incompatible change due to removal of field: 'service.provider.name'." + } + ], + "versionStamp": "browsePathsV2:0;dataPlatformInstance:0;datasetKey:0;schemaMetadata:2" + }, + { + "timestamp": 1723245279320, + "actor": "urn:li:corpuser:__datahub_system", + "semVer": "2.0.0-computed", + "semVerChange": "MAJOR", + "changeEvents": [ + { + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD)", + "category": "TECHNICAL_SCHEMA", + "operation": "MODIFY", + "modifier": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD),service.provider.id2)", + "parameters": { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=int].id2", + "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD),service.provider.id2)", + "nullable": true + }, + "semVerChange": "MINOR", + "description": "A forwards & backwards compatible change due to renaming of the field 'service.provider.id2 to service.provider.id'." + }, + { + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD)", + "category": "TECHNICAL_SCHEMA", + "operation": "REMOVE", + "modifier": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD),service.provider.id3)", + "parameters": { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=int].id3", + "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD),service.provider.id3)", + "nullable": true + }, + "semVerChange": "MAJOR", + "description": "A backwards incompatible change due to removal of field: 'service.provider.id3'." + }, + { + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD)", + "category": "TECHNICAL_SCHEMA", + "operation": "ADD", + "modifier": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD),service.provider.name)", + "parameters": { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=string].name", + "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD),service.provider.name)", + "nullable": true + }, + "semVerChange": "MINOR", + "description": "A forwards & backwards compatible change due to the newly added field 'service.provider.name'." + } + ], + "versionStamp": "browsePathsV2:0;dataPlatformInstance:0;datasetKey:0;schemaMetadata:0" + } + ] + } + } +] \ No newline at end of file diff --git a/smoke-test/tests/openapi/v2/__init__.py b/smoke-test/tests/openapi/v2/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/smoke-test/tests/openapi/v2/structured_properties.json b/smoke-test/tests/openapi/v2/structured_properties.json new file mode 100644 index 00000000000000..7eb67ffc8c320c --- /dev/null +++ b/smoke-test/tests/openapi/v2/structured_properties.json @@ -0,0 +1,332 @@ +[ + { + "request": { + "url": "/openapi/v2/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Atest%2CdatasetStructPropV2%2CPROD%29", + "description": "Remove test dataset", + "method": "delete" + } + }, + { + "request": { + "url": "/openapi/v2/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.v2.retentionTime", + "description": "Remove test structured property", + "method": "delete" + } + }, + { + "request": { + "url": "/openapi/v2/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.v2.retentionTime02", + "description": "Remove test structured property #2", + "method": "delete" + } + }, + { + "request": { + "url": "/openapi/v2/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.v2.retentionTime/propertyDefinition", + "description": "Create structured property definition", + "params": { + "createIfNotExists": "false" + }, + "json": { + "qualifiedName": "io.acryl.privacy.v2.retentionTime", + "valueType": "urn:li:dataType:datahub.number", + "description": "Retention Time is used to figure out how long to retain records in a dataset", + "displayName": "Retention Time", + "cardinality": "MULTIPLE", + "entityTypes": [ + "urn:li:entityType:datahub.dataset", + "urn:li:entityType:datahub.dataFlow" + ], + "allowedValues": [ + { + "value": { + "double": 30 + }, + "description": "30 days, usually reserved for datasets that are ephemeral and contain pii" + }, + { + "value": { + "double": 60 + }, + "description": "Use this for datasets that drive monthly reporting but contain pii" + }, + { + "value": { + "double": 365 + }, + "description": "Use this for non-sensitive data that can be retained for longer" + } + ] + } + }, + "response": { + "json": { + "urn": "urn:li:structuredProperty:io.acryl.privacy.v2.retentionTime", + "aspects": { + "propertyDefinition": { + "value": { + "allowedValues": [ + { + "value": { + "double": 30.0 + }, + "description": "30 days, usually reserved for datasets that are ephemeral and contain pii" + }, + { + "value": { + "double": 60.0 + }, + "description": "Use this for datasets that drive monthly reporting but contain pii" + }, + { + "value": { + "double": 365.0 + }, + "description": "Use this for non-sensitive data that can be retained for longer" + } + ], + "qualifiedName": "io.acryl.privacy.v2.retentionTime", + "displayName": "Retention Time", + "valueType": "urn:li:dataType:datahub.number", + "description": "Retention Time is used to figure out how long to retain records in a dataset", + "entityTypes": [ + "urn:li:entityType:datahub.dataset", + "urn:li:entityType:datahub.dataFlow" + ], + "cardinality": "MULTIPLE" + } + } + } + } + } + }, + { + "request": { + "url": "/openapi/v2/entity/dataset?createIfNotExists=false&createEntityIfNotExists=false", + "description": "Create dataset", + "json": [ + { + "urn": "urn:li:dataset:(urn:li:dataPlatform:test,datasetStructPropV2,PROD)", + "aspects": { + "status": { + "value": { + "removed": false + } + } + } + } + ] + }, + "response": { + "json": [ + { + "urn": "urn:li:dataset:(urn:li:dataPlatform:test,datasetStructPropV2,PROD)" + } + ] + } + }, + { + "request": { + "url": "/openapi/v2/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Atest%2CdatasetStructPropV2%2CPROD%29/structuredProperties?createIfNotExists=false", + "description": "Add structured property to dataset", + "json": { + "properties": [ + { + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.v2.retentionTime", + "values": [ + { + "double": 60.0 + } + ] + } + ] + } + }, + "response": { + "json": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:test,datasetStructPropV2,PROD)", + "aspects": { + "structuredProperties": { + "value": { + "properties": [ + { + "values": [ + { + "double": 60.0 + } + ], + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.v2.retentionTime" + } + ] + } + } + } + } + } + }, + { + "request": { + "url": "/openapi/v2/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.v2.retentionTime02/propertyDefinition?createIfNotExists=false", + "description": "Create structured property definition #2", + "params": { + "createIfNotExists": "false" + }, + "json": { + "qualifiedName": "io.acryl.privacy.v2.retentionTime02", + "displayName": "Retention Time 02", + "valueType": "urn:li:dataType:datahub.string", + "allowedValues": [ + { + "value": { + "string": "foo2" + }, + "description": "test foo2 value" + }, + { + "value": { + "string": "bar2" + }, + "description": "test bar2 value" + } + ], + "cardinality": "SINGLE", + "entityTypes": [ + "urn:li:entityType:datahub.dataset" + ] + } + }, + "response": { + "json": { + "urn": "urn:li:structuredProperty:io.acryl.privacy.v2.retentionTime02", + "aspects": { + "propertyDefinition": { + "value": { + "allowedValues": [ + { + "description": "test foo2 value", + "value": { + "string": "foo2" + } + }, + { + "description": "test bar2 value", + "value": { + "string": "bar2" + } + } + ], + "entityTypes": [ + "urn:li:entityType:datahub.dataset" + ], + "cardinality": "SINGLE", + "displayName": "Retention Time 02", + "qualifiedName": "io.acryl.privacy.v2.retentionTime02", + "valueType": "urn:li:dataType:datahub.string" + } + } + } + } + } + }, + { + "request": { + "url": "/openapi/v2/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Atest%2CdatasetStructPropV2%2CPROD%29/structuredProperties", + "description": "Patch ADD structured property", + "method": "patch", + "json": { + "patch": [ + { + "op": "add", + "path": "/properties/urn:li:structuredProperty:io.acryl.privacy.v2.retentionTime02", + "value": { + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.v2.retentionTime02", + "values": [ + { + "string": "bar2" + } + ] + } + } + ], + "arrayPrimaryKeys": { + "properties": [ + "propertyUrn" + ] + } + } + }, + "response": { + "json": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:test,datasetStructPropV2,PROD)", + "aspects": { + "structuredProperties": { + "value": { + "properties": [ + { + "values": [ + { + "double": 60.0 + } + ], + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.v2.retentionTime" + }, + { + "values": [ + { + "string": "bar2" + } + ], + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.v2.retentionTime02" + } + ] + } + } + } + } + } + }, + { + "request": { + "url": "/openapi/v2/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Atest%2CdatasetStructPropV2%2CPROD%29/structuredProperties", + "description": "Patch REMOVE structured property", + "method": "patch", + "json": { + "patch": [ + { + "op": "remove", + "path": "/properties/urn:li:structuredProperty:io.acryl.privacy.v2.retentionTime02", + "value": { + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.v2.retentionTime02" + } + } + ], + "arrayPrimaryKeys": { + "properties": [ + "propertyUrn" + ] + } + } + }, + "response": { + "json": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:test,datasetStructPropV2,PROD)", + "aspects": { + "structuredProperties": { + "value": { + "properties": [ + { + "values": [ + { + "double": 60.0 + } + ], + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.v2.retentionTime" + } + ] + } + } + } + } + } + } +] \ No newline at end of file diff --git a/smoke-test/tests/openapi/v2/timeline.json b/smoke-test/tests/openapi/v2/timeline.json new file mode 100644 index 00000000000000..ceee67b39a6d0f --- /dev/null +++ b/smoke-test/tests/openapi/v2/timeline.json @@ -0,0 +1,526 @@ +[ + { + "request": { + "url": "/openapi/v2/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Atest%2CdatasetTimelineV2%2CPROD%29", + "description": "Remove test dataset", + "method": "delete" + } + }, + { + "request": { + "url": "/openapi/v2/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Atest%2CdatasetTimelineV2%2CPROD%29/schemaMetadata?createIfNotExists=false", + "description": "Schema version 1", + "json": { + "schemaName": "db1.nested_struct_test", + "platform": "urn:li:dataPlatform:hive", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": "urn:li:corpuser:jdoe" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": "urn:li:corpuser:jdoe" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "property_id", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "int", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "struct>", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"struct>\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=string].type", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "struct", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"struct\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=string].name", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "varchar(50)", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"varchar(50)\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=int].id", + "nullable": true, + "description": "Service provider id", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "tinyint", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"tinyint\", \"_nullable\": true}" + } + ] + } + } + }, + { + "request": { + "url": "/openapi/v2/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Atest%2CdatasetTimelineV2%2CPROD%29/schemaMetadata?createIfNotExists=false", + "description": "Schema version 2", + "json": { + "schemaName": "db1.nested_struct_test", + "platform": "urn:li:dataPlatform:hive", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": "urn:li:corpuser:jdoe" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": "urn:li:corpuser:jdoe" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "property_id", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "int", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "struct>", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"struct>\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=string].type", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "struct", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"struct\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=int].id3", + "description": "Service provider name", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "tinyint", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"tinyint\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=int].id2", + "nullable": true, + "description": "Service provider id", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "tinyint", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"tinyint\", \"_nullable\": true}" + } + ] + } + } + }, + { + "request": { + "url": "/openapi/v2/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Atest%2CdatasetTimelineV2%2CPROD%29/schemaMetadata?createIfNotExists=false", + "description": "Schema version 3", + "json": { + "schemaName": "db1.nested_struct_test", + "platform": "urn:li:dataPlatform:hive", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": "urn:li:corpuser:jdoe" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": "urn:li:corpuser:jdoe" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "property_id", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "int", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "struct>", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"struct>\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=string].type", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "struct", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"struct\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=string].name", + "description": "Service provider name", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "varchar(50)", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"varchar(50)\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=int].id", + "nullable": true, + "description": "Service provider id", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "tinyint", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"tinyint\", \"_nullable\": true}" + } + ] + } + } + }, + { + "request": { + "url": "/openapi/v2/timeline/v1/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Atest%2CdatasetTimelineV2%2CPROD%29?startTime=-1&endTime=0&raw=false&categories=TECHNICAL_SCHEMA", + "method": "get", + "description": "Get timeline response" + }, + "response": { + "exclude_regex_paths": [ + "root\\[.+?\\]\\['timestamp'\\]" + ], + "json": [ + { + "timestamp": 1723245258298, + "actor": "urn:li:corpuser:__datahub_system", + "semVer": "0.0.0-computed", + "semVerChange": "MINOR", + "changeEvents": [ + { + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD)", + "category": "TECHNICAL_SCHEMA", + "operation": "ADD", + "modifier": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD),service)", + "parameters": { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service", + "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD),service)", + "nullable": true + }, + "semVerChange": "MINOR", + "description": "A forwards & backwards compatible change due to the newly added field 'service'." + }, + { + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD)", + "category": "TECHNICAL_SCHEMA", + "operation": "ADD", + "modifier": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD),service.type)", + "parameters": { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=string].type", + "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD),service.type)", + "nullable": true + }, + "semVerChange": "MINOR", + "description": "A forwards & backwards compatible change due to the newly added field 'service.type'." + }, + { + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD)", + "category": "TECHNICAL_SCHEMA", + "operation": "ADD", + "modifier": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD),service.provider)", + "parameters": { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider", + "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD),service.provider)", + "nullable": true + }, + "semVerChange": "MINOR", + "description": "A forwards & backwards compatible change due to the newly added field 'service.provider'." + }, + { + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD)", + "category": "TECHNICAL_SCHEMA", + "operation": "ADD", + "modifier": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD),service.provider.id)", + "parameters": { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=int].id", + "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD),service.provider.id)", + "nullable": true + }, + "semVerChange": "MINOR", + "description": "A forwards & backwards compatible change due to the newly added field 'service.provider.id'." + }, + { + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD)", + "category": "TECHNICAL_SCHEMA", + "operation": "ADD", + "modifier": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD),service.provider.name)", + "parameters": { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=string].name", + "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD),service.provider.name)", + "nullable": true + }, + "semVerChange": "MINOR", + "description": "A forwards & backwards compatible change due to the newly added field 'service.provider.name'." + }, + { + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD)", + "category": "TECHNICAL_SCHEMA", + "operation": "ADD", + "modifier": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD),property_id)", + "parameters": { + "fieldPath": "property_id", + "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD),property_id)", + "nullable": false + }, + "semVerChange": "MINOR", + "description": "A forwards & backwards compatible change due to the newly added field 'property_id'." + } + ], + "versionStamp": "browsePathsV2:0;dataPlatformInstance:0;datasetKey:0;schemaMetadata:1" + }, + { + "timestamp": 1723245269788, + "actor": "urn:li:corpuser:__datahub_system", + "semVer": "1.0.0-computed", + "semVerChange": "MAJOR", + "changeEvents": [ + { + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD)", + "category": "TECHNICAL_SCHEMA", + "operation": "MODIFY", + "modifier": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD),service.provider.id)", + "parameters": { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=int].id", + "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD),service.provider.id)", + "nullable": true + }, + "semVerChange": "MINOR", + "description": "A forwards & backwards compatible change due to renaming of the field 'service.provider.id to service.provider.id2'." + }, + { + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD)", + "category": "TECHNICAL_SCHEMA", + "operation": "ADD", + "modifier": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD),service.provider.id3)", + "parameters": { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=int].id3", + "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD),service.provider.id3)", + "nullable": true + }, + "semVerChange": "MINOR", + "description": "A forwards & backwards compatible change due to the newly added field 'service.provider.id3'." + }, + { + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD)", + "category": "TECHNICAL_SCHEMA", + "operation": "REMOVE", + "modifier": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD),service.provider.name)", + "parameters": { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=string].name", + "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD),service.provider.name)", + "nullable": true + }, + "semVerChange": "MAJOR", + "description": "A backwards incompatible change due to removal of field: 'service.provider.name'." + } + ], + "versionStamp": "browsePathsV2:0;dataPlatformInstance:0;datasetKey:0;schemaMetadata:2" + }, + { + "timestamp": 1723245279320, + "actor": "urn:li:corpuser:__datahub_system", + "semVer": "2.0.0-computed", + "semVerChange": "MAJOR", + "changeEvents": [ + { + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD)", + "category": "TECHNICAL_SCHEMA", + "operation": "MODIFY", + "modifier": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD),service.provider.id2)", + "parameters": { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=int].id2", + "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD),service.provider.id2)", + "nullable": true + }, + "semVerChange": "MINOR", + "description": "A forwards & backwards compatible change due to renaming of the field 'service.provider.id2 to service.provider.id'." + }, + { + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD)", + "category": "TECHNICAL_SCHEMA", + "operation": "REMOVE", + "modifier": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD),service.provider.id3)", + "parameters": { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=int].id3", + "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD),service.provider.id3)", + "nullable": true + }, + "semVerChange": "MAJOR", + "description": "A backwards incompatible change due to removal of field: 'service.provider.id3'." + }, + { + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD)", + "category": "TECHNICAL_SCHEMA", + "operation": "ADD", + "modifier": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD),service.provider.name)", + "parameters": { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=string].name", + "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD),service.provider.name)", + "nullable": true + }, + "semVerChange": "MINOR", + "description": "A forwards & backwards compatible change due to the newly added field 'service.provider.name'." + } + ], + "versionStamp": "browsePathsV2:0;dataPlatformInstance:0;datasetKey:0;schemaMetadata:0" + } + ] + } + } +] \ No newline at end of file diff --git a/smoke-test/tests/openapi/v3/__init__.py b/smoke-test/tests/openapi/v3/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/smoke-test/tests/openapi/v3/structured_properties.json b/smoke-test/tests/openapi/v3/structured_properties.json new file mode 100644 index 00000000000000..b000c5da0a2838 --- /dev/null +++ b/smoke-test/tests/openapi/v3/structured_properties.json @@ -0,0 +1,331 @@ +[ + { + "request": { + "url": "/openapi/v3/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Atest%2CdatasetStructPropV3%2CPROD%29", + "description": "Remove test dataset", + "method": "delete" + } + }, + { + "request": { + "url": "/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.v3.retentionTime", + "description": "Remove test structured property", + "method": "delete" + } + }, + { + "request": { + "url": "/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.v3.retentionTime02", + "description": "Remove test structured property #2", + "method": "delete" + } + }, + { + "request": { + "url": "/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.v3.retentionTime/propertyDefinition", + "description": "Create structured property definition", + "params": { + "createIfNotExists": "false" + }, + "json": { + "value": { + "qualifiedName": "io.acryl.privacy.v3.retentionTime", + "valueType": "urn:li:dataType:datahub.number", + "description": "Retention Time is used to figure out how long to retain records in a dataset", + "displayName": "Retention Time", + "cardinality": "MULTIPLE", + "entityTypes": [ + "urn:li:entityType:datahub.dataset", + "urn:li:entityType:datahub.dataFlow" + ], + "allowedValues": [ + { + "value": { + "double": 30 + }, + "description": "30 days, usually reserved for datasets that are ephemeral and contain pii" + }, + { + "value": { + "double": 60 + }, + "description": "Use this for datasets that drive monthly reporting but contain pii" + }, + { + "value": { + "double": 365 + }, + "description": "Use this for non-sensitive data that can be retained for longer" + } + ] + } + } + }, + "response": { + "json": { + "urn": "urn:li:structuredProperty:io.acryl.privacy.v3.retentionTime", + "propertyDefinition": { + "value": { + "allowedValues": [ + { + "value": { + "double": 30.0 + }, + "description": "30 days, usually reserved for datasets that are ephemeral and contain pii" + }, + { + "value": { + "double": 60.0 + }, + "description": "Use this for datasets that drive monthly reporting but contain pii" + }, + { + "value": { + "double": 365.0 + }, + "description": "Use this for non-sensitive data that can be retained for longer" + } + ], + "qualifiedName": "io.acryl.privacy.v3.retentionTime", + "displayName": "Retention Time", + "valueType": "urn:li:dataType:datahub.number", + "description": "Retention Time is used to figure out how long to retain records in a dataset", + "entityTypes": [ + "urn:li:entityType:datahub.dataset", + "urn:li:entityType:datahub.dataFlow" + ], + "cardinality": "MULTIPLE" + } + } + } + } + }, + { + "request": { + "url": "/openapi/v3/entity/dataset?createIfNotExists=false&createEntityIfNotExists=false", + "description": "Create dataset", + "json": [ + { + "urn": "urn:li:dataset:(urn:li:dataPlatform:test,datasetStructPropV3,PROD)", + "status": { + "value": { + "removed": false + } + } + } + ] + }, + "response": { + "json": [ + { + "urn": "urn:li:dataset:(urn:li:dataPlatform:test,datasetStructPropV3,PROD)", + "status": { + "value": { + "removed": false + } + } + } + ] + } + }, + { + "request": { + "url": "/openapi/v3/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Atest%2CdatasetStructPropV3%2CPROD%29/structuredProperties?createIfNotExists=false", + "description": "Add structured property to dataset", + "json": { + "value": { + "properties": [ + { + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.v3.retentionTime", + "values": [ + { + "double": 60.0 + } + ] + } + ] + } + } + }, + "response": { + "json": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:test,datasetStructPropV3,PROD)", + "structuredProperties": { + "value": { + "properties": [ + { + "values": [ + { + "double": 60.0 + } + ], + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.v3.retentionTime" + } + ] + } + } + } + } + }, + { + "request": { + "url": "/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.v3.retentionTime02/propertyDefinition?createIfNotExists=false", + "description": "Create structured property definition #2", + "params": { + "createIfNotExists": "false" + }, + "json": { + "value": { + "qualifiedName": "io.acryl.privacy.v3.retentionTime02", + "displayName": "Retention Time 02", + "valueType": "urn:li:dataType:datahub.string", + "allowedValues": [ + { + "value": { + "string": "foo2" + }, + "description": "test foo2 value" + }, + { + "value": { + "string": "bar2" + }, + "description": "test bar2 value" + } + ], + "cardinality": "SINGLE", + "entityTypes": [ + "urn:li:entityType:datahub.dataset" + ] + } + } + }, + "response": { + "json": { + "urn": "urn:li:structuredProperty:io.acryl.privacy.v3.retentionTime02", + "propertyDefinition": { + "value": { + "allowedValues": [ + { + "description": "test foo2 value", + "value": { + "string": "foo2" + } + }, + { + "description": "test bar2 value", + "value": { + "string": "bar2" + } + } + ], + "entityTypes": [ + "urn:li:entityType:datahub.dataset" + ], + "cardinality": "SINGLE", + "displayName": "Retention Time 02", + "qualifiedName": "io.acryl.privacy.v3.retentionTime02", + "valueType": "urn:li:dataType:datahub.string" + } + } + } + } + }, + { + "request": { + "url": "/openapi/v3/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Atest%2CdatasetStructPropV3%2CPROD%29/structuredProperties", + "description": "Patch ADD structured property", + "method": "patch", + "json": { + "patch": [ + { + "op": "add", + "path": "/properties/urn:li:structuredProperty:io.acryl.privacy.v3.retentionTime02", + "value": { + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.v3.retentionTime02", + "values": [ + { + "string": "bar2" + } + ] + } + } + ], + "arrayPrimaryKeys": { + "properties": [ + "propertyUrn" + ] + } + } + }, + "response": { + "json": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:test,datasetStructPropV3,PROD)", + "structuredProperties": { + "value": { + "properties": [ + { + "values": [ + { + "double": 60.0 + } + ], + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.v3.retentionTime" + }, + { + "values": [ + { + "string": "bar2" + } + ], + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.v3.retentionTime02" + } + ] + } + } + } + } + }, + { + "request": { + "url": "/openapi/v3/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Atest%2CdatasetStructPropV3%2CPROD%29/structuredProperties", + "description": "Patch REMOVE structured property", + "method": "patch", + "json": { + "patch": [ + { + "op": "remove", + "path": "/properties/urn:li:structuredProperty:io.acryl.privacy.v3.retentionTime02", + "value": { + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.v3.retentionTime02" + } + } + ], + "arrayPrimaryKeys": { + "properties": [ + "propertyUrn" + ] + } + } + }, + "response": { + "json": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:test,datasetStructPropV3,PROD)", + "structuredProperties": { + "value": { + "properties": [ + { + "values": [ + { + "double": 60.0 + } + ], + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.v3.retentionTime" + } + ] + } + } + } + } + } +] \ No newline at end of file diff --git a/smoke-test/tests/privileges/test_privileges.py b/smoke-test/tests/privileges/test_privileges.py index c9a0b621593148..bce7b8a238c385 100644 --- a/smoke-test/tests/privileges/test_privileges.py +++ b/smoke-test/tests/privileges/test_privileges.py @@ -4,11 +4,13 @@ from tests.privileges.utils import ( assign_role, assign_user_to_group, + clear_polices, create_group, create_user, create_user_policy, remove_group, remove_policy, + remove_secret, remove_user, set_base_platform_privileges_policy_status, set_view_dataset_sensitive_info_policy_status, @@ -65,6 +67,12 @@ def privileges_and_test_user_setup(admin_session): # Remove test user remove_user(admin_session, "urn:li:corpuser:user") + # Remove secret + remove_secret(admin_session, "urn:li:dataHubSecret:TestSecretName") + + # Remove test policies + clear_polices(admin_session) + # Restore All users privileges set_base_platform_privileges_policy_status("ACTIVE", admin_session) set_view_dataset_sensitive_info_policy_status("ACTIVE", admin_session) diff --git a/smoke-test/tests/privileges/utils.py b/smoke-test/tests/privileges/utils.py index 1e58ec4085b703..72ad94a42a4627 100644 --- a/smoke-test/tests/privileges/utils.py +++ b/smoke-test/tests/privileges/utils.py @@ -246,8 +246,8 @@ def create_user_policy(user_urn, privileges, session): "variables": { "input": { "type": "PLATFORM", - "name": "Policy Name", - "description": "Policy Description", + "name": "Test Policy Name", + "description": "Test Policy Description", "state": "ACTIVE", "resources": {"filter": {"criteria": []}}, "privileges": privileges, @@ -288,3 +288,69 @@ def remove_policy(urn, session): assert res_data["data"] assert res_data["data"]["deletePolicy"] assert res_data["data"]["deletePolicy"] == urn + + +def clear_polices(session): + list_policy_json = { + "query": """query listPolicies($input: ListPoliciesInput!) { + listPolicies(input: $input) { + start + count + total + policies { + urn + editable + name + description + __typename + } + __typename + } + }""", + "variables": { + "input": { + "count": 100, + "start": 0, + "orFilters": [ + { + "and": [ + { + "field": "state", + "values": ["ACTIVE"], + "condition": "EQUAL", + }, + { + "field": "editable", + "values": ["true"], + "condition": "EQUAL", + }, + ] + } + ], + } + }, + } + + response = session.post( + f"{get_frontend_url()}/api/v2/graphql", json=list_policy_json + ) + response.raise_for_status() + res_data = response.json() + + assert res_data + assert res_data["data"] + assert res_data["data"]["listPolicies"] + for policy in res_data["data"]["listPolicies"]["policies"]: + if "test" in policy["name"].lower() or "test" in policy["description"].lower(): + remove_policy(policy["urn"], session) + + +def remove_secret(session, urn): + remove_secret = { + "query": """mutation deleteSecret($urn: String!) {\n + deleteSecret(urn: $urn)\n}""", + "variables": {"urn": urn}, + } + + response = session.post(f"{get_frontend_url()}/api/v2/graphql", json=remove_secret) + response.raise_for_status() diff --git a/smoke-test/tests/read_only/test_search.py b/smoke-test/tests/read_only/test_search.py index 90385c5228bc1a..3b9635f3da2cd5 100644 --- a/smoke-test/tests/read_only/test_search.py +++ b/smoke-test/tests/read_only/test_search.py @@ -1,10 +1,13 @@ import pytest +import requests from tests.test_result_msg import add_datahub_stats -from tests.utils import get_frontend_session, get_frontend_url +from tests.utils import get_frontend_session, get_frontend_url, get_gms_url -restli_default_headers = { - "X-RestLi-Protocol-Version": "2.0.0", +BASE_URL_V3 = f"{get_gms_url()}/openapi/v3" + +default_headers = { + "Content-Type": "application/json", } ENTITY_TO_MAP = { @@ -59,16 +62,8 @@ def _get_search_result(frontend_session, entity: str): ("chart", "chart"), ("dataset", "dataset"), ("dashboard", "dashboard"), - ( - # Task - "dataJob", - "dataJob", - ), - ( - # Pipeline - "dataFlow", - "dataFlow", - ), + ("dataJob", "dataJob"), + ("dataFlow", "dataFlow"), ("container", "container"), ("tag", "tag"), ("corpUser", "corpUser"), @@ -78,11 +73,7 @@ def _get_search_result(frontend_session, entity: str): ("mlPrimaryKey", "mlPrimaryKey"), ("corpGroup", "corpGroup"), ("mlFeatureTable", "mlFeatureTable"), - ( - # Term group - "glossaryNode", - "glossaryNode", - ), + ("glossaryNode", "glossaryNode"), ("mlModel", "mlModel"), ], ) @@ -112,8 +103,56 @@ def test_search_works(entity_type, api_name): """, "variables": {"input": first_urn}, } + response = frontend_session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) response.raise_for_status() res_data = response.json() assert res_data["data"], f"res_data was {res_data}" assert res_data["data"][api_name]["urn"] == first_urn, f"res_data was {res_data}" + + +@pytest.mark.read_only +@pytest.mark.parametrize( + "entity_type", + [ + "chart", + "dataset", + "dashboard", + "dataJob", + "dataFlow", + "container", + "tag", + "corpUser", + "mlFeature", + "glossaryTerm", + "domain", + "mlPrimaryKey", + "corpGroup", + "mlFeatureTable", + "glossaryNode", + "mlModel", + ], +) +def test_openapi_v3_entity(entity_type): + frontend_session = get_frontend_session() + search_result = _get_search_result(frontend_session, entity_type) + num_entities = search_result["total"] + if num_entities == 0: + print(f"[WARN] No results for {entity_type}") + return + entities = search_result["searchResults"] + + first_urn = entities[0]["entity"]["urn"] + + session = requests.Session() + url = f"{BASE_URL_V3}/entity/{entity_type}/{first_urn}" + response = session.get(url, headers=default_headers) + response.raise_for_status() + actual_data = response.json() + print(f"Entity Data for URN {first_urn}: {actual_data}") + + expected_data = {"urn": first_urn} + + assert ( + actual_data["urn"] == expected_data["urn"] + ), f"Mismatch: expected {expected_data}, got {actual_data}"