diff --git a/.github/actions/ci-optimization/action.yml b/.github/actions/ci-optimization/action.yml index 2f677a0e552c23..ff901b5de04b65 100644 --- a/.github/actions/ci-optimization/action.yml +++ b/.github/actions/ci-optimization/action.yml @@ -1,5 +1,5 @@ -name: 'Identify CI Optimizations' -description: 'Determine if code changes are specific to certain modules.' +name: "Identify CI Optimizations" +description: "Determine if code changes are specific to certain modules." outputs: frontend-only: @@ -44,27 +44,25 @@ outputs: runs: using: "composite" steps: - - uses: dorny/paths-filter@v2 + - uses: dorny/paths-filter@v3 id: filter with: + token: "" # Empty token forces it to use raw git commands. filters: | frontend: - "datahub-frontend/**" - "datahub-web-react/**" - - "smoke-test/tests/cypress/**" - "docker/datahub-frontend/**" ingestion: - "metadata-ingestion-modules/**" - "metadata-ingestion/**" - "metadata-models/**" - - "smoke-test/**" - "docker/datahub-ingestion**" ingestion-base: - "docker/datahub-ingestion-base/**" docker: - "docker/**" backend: - - ".github/**" - "metadata-models/**" - "datahub-upgrade/**" - "entity-registry/**" @@ -78,7 +76,6 @@ runs: - "metadata-utils/**" - "metadata-operation-context/**" - "datahub-graphql-core/**" - - "smoke-test/**" - "docker/**" kafka-setup: - "docker/kafka-setup/**" diff --git a/.github/actions/docker-custom-build-and-push/action.yml b/.github/actions/docker-custom-build-and-push/action.yml index 1c4a777c14802a..3805b3501ccec0 100644 --- a/.github/actions/docker-custom-build-and-push/action.yml +++ b/.github/actions/docker-custom-build-and-push/action.yml @@ -26,10 +26,13 @@ inputs: build-args: description: "List of build-time variables. Same as docker/build-push-action" required: false - tags: - # e.g. latest,head,sha12345 - description: "List of tags to use for the Docker image" + image_tag: + # e.g. pr12345 OR head OR v0.1.2.3 + description: "Main tag to use for the Docker image" required: true + flavor: + description: 'Image flavor (e.g., slim, full)' + required: false target: description: "Sets the target stage to build" required: false @@ -45,13 +48,16 @@ runs: steps: - name: Docker meta id: docker_meta - uses: crazy-max/ghaction-docker-meta@v1 + uses: docker/metadata-action@v5 with: - # list of Docker images to use as base name for tags images: ${{ inputs.images }} - # add git short SHA as Docker tag - tag-custom: ${{ inputs.tags }} - tag-custom-only: true + flavor: | + latest=false + suffix=${{ inputs.flavor && format('-{0}', inputs.flavor) || '' }} + tags: | + type=raw,value=${{ inputs.image_tag }} + type=raw,value=head,enable={{is_default_branch}} + type=sha,prefix=,format=short # Code for testing the build when not pushing to Docker Hub. - name: Build and Load image for testing (if not publishing) @@ -74,10 +80,13 @@ runs: if: ${{ inputs.publish != 'true' }} shell: bash run: | + IMAGES=""" + ${{ inputs.images }} + """ TAGS=""" - ${{ steps.docker_meta.outputs.tags }} + ${{ inputs.image_tag }} """ - echo "SINGLE_TAG=$(echo $TAGS | tr '\n' ' ' | awk -F' ' '{ print $1 }')" >> $GITHUB_OUTPUT + echo "SINGLE_TAG=$(echo $IMAGES | tr '\n' ' ' | awk -F' |,' '{ print $1 }'):$(echo $TAGS | tr '\n' ' ' | awk -F' |,' '{ print $1 }')" >> $GITHUB_OUTPUT id: single_tag - name: Upload image locally for testing (if not publishing) uses: ishworkh/docker-image-artifact-upload@v1 diff --git a/.github/scripts/docker_helpers.sh b/.github/scripts/docker_helpers.sh index e031a6d2a4d843..138c8649820ec5 100755 --- a/.github/scripts/docker_helpers.sh +++ b/.github/scripts/docker_helpers.sh @@ -5,22 +5,22 @@ export MAIN_BRANCH="master" export MAIN_BRANCH_TAG="head" function get_short_sha { - echo $(git rev-parse --short "$GITHUB_SHA") + echo $(git rev-parse --short "$GITHUB_SHA"|head -c7) } export SHORT_SHA=$(get_short_sha) echo "SHORT_SHA: $SHORT_SHA" function get_tag { - echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG},g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1,g'),${SHORT_SHA} + echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG},g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1,g') } function get_tag_slim { - echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}-slim,g" -e 's,refs/tags/\(.*\),\1-slim,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-slim,g'),${SHORT_SHA}-slim + echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}-slim,g" -e 's,refs/tags/\(.*\),\1-slim,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-slim,g') } function get_tag_full { - echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}-full,g" -e 's,refs/tags/\(.*\),\1-full,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-full,g'),${SHORT_SHA}-full + echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}-full,g" -e 's,refs/tags/\(.*\),\1-full,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-full,g') } function get_python_docker_release_v { @@ -38,3 +38,11 @@ function get_unique_tag_slim { function get_unique_tag_full { echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${SHORT_SHA}-full,g" -e 's,refs/tags/\(.*\),\1-full,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-full,g') } + +function get_platforms_based_on_branch { + if [ "${{ github.event_name }}" == 'push' && "${{ github.ref }}" == "refs/heads/${MAIN_BRANCH}" ]; then + echo "linux/amd64,linux/arm64" + else + echo "linux/amd64" + fi +} diff --git a/.github/scripts/docker_logs.sh b/.github/scripts/docker_logs.sh new file mode 100644 index 00000000000000..918b859fbe5b1d --- /dev/null +++ b/.github/scripts/docker_logs.sh @@ -0,0 +1,8 @@ +TARGET_DIR="${TARGET_DIR:=docker_logs}" +TEST_STRATEGY="${TEST_STRATEGY:=}" + +mkdir -p "$TARGET_DIR" +for name in `docker ps -a --format '{{.Names}}'`; +do + docker logs "$name" >& "${TARGET_DIR}/${name}${TEST_STRATEGY}.log" || true +done \ No newline at end of file diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index c93267947b65a8..3eb34eca85a46f 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -57,6 +57,11 @@ jobs: timeout-minutes: 60 needs: setup steps: + - name: Free up disk space + run: | + sudo apt-get remove 'dotnet-*' azure-cli || true + sudo rm -rf /usr/local/lib/android/ || true + sudo docker image prune -a -f || true - uses: szenius/set-timezone@v1.2 with: timezoneLinux: ${{ matrix.timezone }} @@ -86,6 +91,8 @@ jobs: -x :metadata-ingestion-modules:airflow-plugin:check \ -x :metadata-ingestion-modules:dagster-plugin:build \ -x :metadata-ingestion-modules:dagster-plugin:check \ + -x :metadata-ingestion-modules:gx-plugin:build \ + -x :metadata-ingestion-modules:gx-plugin:check \ -x :datahub-frontend:build \ -x :datahub-web-react:build \ --parallel diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml index 9487e71e8da3d1..32e68a76a88f5a 100644 --- a/.github/workflows/docker-unified.yml +++ b/.github/workflows/docker-unified.yml @@ -47,7 +47,6 @@ jobs: publish: ${{ steps.publish.outputs.publish }} pr-publish: ${{ steps.pr-publish.outputs.publish }} python_release_version: ${{ steps.tag.outputs.python_release_version }} - short_sha: ${{ steps.tag.outputs.short_sha }} branch_name: ${{ steps.tag.outputs.branch_name }} repository_name: ${{ steps.tag.outputs.repository_name }} frontend_change: ${{ steps.ci-optimize.outputs.frontend-change == 'true' }} @@ -61,6 +60,7 @@ jobs: mysql_setup_change: ${{ steps.ci-optimize.outputs.mysql-setup-change == 'true' }} postgres_setup_change: ${{ steps.ci-optimize.outputs.postgres-setup-change == 'true' }} elasticsearch_setup_change: ${{ steps.ci-optimize.outputs.elasticsearch-setup-change == 'true' }} + smoke_test_change: ${{ steps.ci-optimize.outputs.smoke-test-change == 'true' }} steps: - name: Check out the repo uses: acryldata/sane-checkout-action@v3 @@ -157,7 +157,7 @@ jobs: with: images: | ${{ env.DATAHUB_GMS_IMAGE }} - tags: ${{ needs.setup.outputs.tag }} + image_tag: ${{ needs.setup.outputs.tag }} username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} @@ -221,7 +221,7 @@ jobs: with: images: | ${{ env.DATAHUB_MAE_CONSUMER_IMAGE }} - tags: ${{ needs.setup.outputs.tag }} + image_tag: ${{ needs.setup.outputs.tag }} username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} @@ -285,7 +285,7 @@ jobs: with: images: | ${{ env.DATAHUB_MCE_CONSUMER_IMAGE }} - tags: ${{ needs.setup.outputs.tag }} + image_tag: ${{ needs.setup.outputs.tag }} username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} @@ -349,7 +349,7 @@ jobs: with: images: | ${{ env.DATAHUB_UPGRADE_IMAGE }} - tags: ${{ needs.setup.outputs.tag }} + image_tag: ${{ needs.setup.outputs.tag }} username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} @@ -394,7 +394,7 @@ jobs: name: Build and Push DataHub Frontend Docker Image runs-on: ubuntu-latest needs: setup - if: ${{ needs.setup.outputs.frontend_change == 'true' || needs.setup.outputs.publish == 'true' }} + if: ${{ needs.setup.outputs.frontend_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true'}} steps: - name: Set up JDK 17 uses: actions/setup-java@v3 @@ -415,7 +415,7 @@ jobs: with: images: | ${{ env.DATAHUB_FRONTEND_IMAGE }} - tags: ${{ needs.setup.outputs.tag }} + image_tag: ${{ needs.setup.outputs.tag }} username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} @@ -469,7 +469,7 @@ jobs: with: images: | ${{ env.DATAHUB_KAFKA_SETUP_IMAGE }} - tags: ${{ needs.setup.outputs.tag }} + image_tag: ${{ needs.setup.outputs.tag }} username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} @@ -490,7 +490,7 @@ jobs: with: images: | ${{ env.DATAHUB_MYSQL_SETUP_IMAGE }} - tags: ${{ needs.setup.outputs.tag }} + image_tag: ${{ needs.setup.outputs.tag }} username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} @@ -502,7 +502,7 @@ jobs: name: Build and Push DataHub Elasticsearch Setup Docker Image runs-on: ubuntu-latest needs: setup - if: ${{ needs.setup.outputs.elasticsearch_setup_change == 'true' || (needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true') }} + if: ${{ needs.setup.outputs.elasticsearch_setup_change == 'true' || (needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' ) }} steps: - name: Check out the repo uses: acryldata/sane-checkout-action@v3 @@ -511,7 +511,7 @@ jobs: with: images: | ${{ env.DATAHUB_ELASTIC_SETUP_IMAGE }} - tags: ${{ needs.setup.outputs.tag }} + image_tag: ${{ needs.setup.outputs.tag }} username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} @@ -525,7 +525,7 @@ jobs: outputs: tag: ${{ steps.tag.outputs.tag }} needs: setup - if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' }} + if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} steps: - name: Check out the repo uses: acryldata/sane-checkout-action@v3 @@ -536,7 +536,7 @@ jobs: target: base images: | ${{ env.DATAHUB_INGESTION_BASE_IMAGE }} - tags: ${{ needs.setup.outputs.tag }} + image_tag: ${{ needs.setup.outputs.tag }} username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} @@ -552,7 +552,7 @@ jobs: outputs: tag: ${{ steps.tag.outputs.tag }} needs: [setup, datahub_ingestion_base_build] - if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' }} + if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} steps: - name: Check out the repo uses: acryldata/sane-checkout-action@v3 @@ -574,7 +574,7 @@ jobs: target: slim-install images: | ${{ env.DATAHUB_INGESTION_BASE_IMAGE }} - tags: ${{ needs.setup.outputs.slim_tag }} + image_tag: ${{ needs.setup.outputs.slim_tag }} username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} build-args: | @@ -593,7 +593,7 @@ jobs: outputs: tag: ${{ steps.tag.outputs.tag }} needs: [setup, datahub_ingestion_base_build] - if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' }} + if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} steps: - name: Check out the repo uses: acryldata/sane-checkout-action@v3 @@ -636,7 +636,7 @@ jobs: tag: ${{ steps.tag.outputs.tag }} needs_artifact_download: ${{ needs.setup.outputs.ingestion_change == 'true' && ( needs.setup.outputs.publish != 'true' && needs.setup.outputs.pr-publish != 'true') }} needs: [setup, datahub_ingestion_base_slim_build] - if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' }} + if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} steps: - name: Set up JDK 17 uses: actions/setup-java@v3 @@ -647,7 +647,7 @@ jobs: - name: Check out the repo uses: acryldata/sane-checkout-action@v3 - name: Build codegen - if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' }} + if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish =='true' }} run: ./gradlew :metadata-ingestion:codegen - name: Download Base Image uses: ishworkh/docker-image-artifact-download@v1 @@ -661,7 +661,7 @@ jobs: username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} - name: Build and push Slim Image - if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' }} + if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} uses: ./.github/actions/docker-custom-build-and-push with: target: final @@ -672,7 +672,7 @@ jobs: DOCKER_VERSION=${{ needs.setup.outputs.ingestion_base_change == 'true' && needs.setup.outputs.unique_slim_tag || 'head-slim' }} RELEASE_VERSION=${{ needs.setup.outputs.python_release_version }} APP_ENV=slim - tags: ${{ needs.setup.outputs.slim_tag }} + image_tag: ${{ needs.setup.outputs.slim_tag }} username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} @@ -723,7 +723,7 @@ jobs: tag: ${{ steps.tag.outputs.tag }} needs_artifact_download: ${{ needs.setup.outputs.ingestion_change == 'true' && ( needs.setup.outputs.publish != 'true' && needs.setup.outputs.pr-publish != 'true' ) }} needs: [setup, datahub_ingestion_base_full_build] - if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' }} + if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} steps: - name: Set up JDK 17 uses: actions/setup-java@v3 @@ -734,7 +734,7 @@ jobs: - name: Check out the repo uses: acryldata/sane-checkout-action@v3 - name: Build codegen - if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' }} + if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} run: ./gradlew :metadata-ingestion:codegen - name: Download Base Image uses: ishworkh/docker-image-artifact-download@v1 @@ -748,7 +748,7 @@ jobs: username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} - name: Build and push Full Image - if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' }} + if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} uses: ./.github/actions/docker-custom-build-and-push with: target: final @@ -758,7 +758,7 @@ jobs: BASE_IMAGE=${{ env.DATAHUB_INGESTION_BASE_IMAGE }} DOCKER_VERSION=${{ needs.setup.outputs.ingestion_base_change == 'true' && needs.setup.outputs.unique_tag || 'head' }} RELEASE_VERSION=${{ needs.setup.outputs.python_release_version }} - tags: ${{ needs.setup.outputs.tag }} + image_tag: ${{ needs.setup.outputs.tag }} username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} @@ -776,7 +776,7 @@ jobs: name: "[Monitoring] Scan Datahub Ingestion images for vulnerabilities" runs-on: ubuntu-latest needs: [setup, datahub_ingestion_full_build] - if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' }} + if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} steps: - name: Checkout # adding checkout step just to make trivy upload happy uses: acryldata/sane-checkout-action@v3 @@ -814,7 +814,7 @@ jobs: echo 'matrix=["cypress_suite1","cypress_rest"]' >> $GITHUB_OUTPUT elif [ '${{ needs.setup.outputs.ingestion_only }}' == 'true' ]; then echo 'matrix=["no_cypress_suite0","no_cypress_suite1"]' >> $GITHUB_OUTPUT - elif [ '${{ needs.setup.outputs.backend_change }}' == 'true' ]; then + elif [[ '${{ needs.setup.outputs.backend_change }}' == 'true' || '${{ needs.setup.outputs.smoke_test_change }}' == 'true' ]]; then echo 'matrix=["no_cypress_suite0","no_cypress_suite1","cypress_suite1","cypress_rest"]' >> $GITHUB_OUTPUT else echo 'matrix=[]' >> $GITHUB_OUTPUT @@ -862,11 +862,6 @@ jobs: with: python-version: "3.10" cache: "pip" - - name: Install dependencies - run: ./metadata-ingestion/scripts/install_deps.sh - - name: Build datahub cli - run: | - ./gradlew :metadata-ingestion:install - name: Login to DockerHub uses: docker/login-action@v3 if: ${{ needs.setup.outputs.docker-login == 'true' }} @@ -965,7 +960,7 @@ jobs: echo 'datahub-ingestion head-slim images' docker pull '${{ env.DATAHUB_INGESTION_IMAGE }}:head-slim' if [ '${{ needs.datahub_ingestion_slim_build.outputs.tag || 'head-slim' }}' != 'head-slim' ]; then - docker tag '${{ env.DATAHUB_INGESTION_IMAGE }}:head-slim' '${{ env.DATAHUB_INGESTION_IMAGE }}:${{ needs.datahub_ingestion_slim_build.outputs.tag }}' + docker tag '${{ env.DATAHUB_INGESTION_IMAGE }}:head-slim' '${{ env.DATAHUB_INGESTION_IMAGE }}:${{ needs.setup.outputs.unique_tag }}' fi fi - name: Disk Check @@ -994,6 +989,15 @@ jobs: } } }' + - name: Disk Check + run: df -h . && docker images + - name: Install dependencies + run: ./metadata-ingestion/scripts/install_deps.sh + - name: Build datahub cli + run: | + ./gradlew :metadata-ingestion:install + - name: Disk Check + run: df -h . && docker images - name: Remove Source Code run: find ./*/* ! -path "./metadata-ingestion*" ! -path "./smoke-test*" ! -path "./gradle*" -delete - name: Disk Check @@ -1014,21 +1018,14 @@ jobs: if: failure() run: | docker ps -a - docker logs datahub-datahub-gms-1 >& gms-${{ matrix.test_strategy }}.log || true - docker logs datahub-datahub-actions-1 >& actions-${{ matrix.test_strategy }}.log || true - docker logs datahub-datahub-mae-consumer-1 >& mae-${{ matrix.test_strategy }}.log || true - docker logs datahub-datahub-mce-consumer-1 >& mce-${{ matrix.test_strategy }}.log || true - docker logs datahub-broker-1 >& broker-${{ matrix.test_strategy }}.log || true - docker logs datahub-mysql-1 >& mysql-${{ matrix.test_strategy }}.log || true - docker logs datahub-elasticsearch-1 >& elasticsearch-${{ matrix.test_strategy }}.log || true - docker logs datahub-datahub-frontend-react-1 >& frontend-${{ matrix.test_strategy }}.log || true - docker logs datahub-upgrade-1 >& upgrade-${{ matrix.test_strategy }}.log || true + TEST_STRATEGY="-${{ matrix.test_strategy }}" + source .github/scripts/docker_logs.sh - name: Upload logs uses: actions/upload-artifact@v3 if: failure() with: name: docker logs - path: "*.log" + path: "docker_logs/*.log" - name: Upload screenshots uses: actions/upload-artifact@v3 if: failure() @@ -1049,7 +1046,7 @@ jobs: runs-on: ubuntu-latest needs: [setup, smoke_test] steps: - - uses: aws-actions/configure-aws-credentials@v1 + - uses: aws-actions/configure-aws-credentials@v4 if: ${{ needs.setup.outputs.publish != 'false' && github.repository_owner == 'datahub-project' && needs.setup.outputs.repository_name == 'datahub' }} with: aws-access-key-id: ${{ secrets.AWS_SQS_ACCESS_KEY_ID }} diff --git a/.github/workflows/gx-plugin.yml b/.github/workflows/gx-plugin.yml new file mode 100644 index 00000000000000..84ba2e0559be1b --- /dev/null +++ b/.github/workflows/gx-plugin.yml @@ -0,0 +1,87 @@ +name: GX Plugin +on: + push: + branches: + - master + paths: + - ".github/workflows/gx-plugin.yml" + - "metadata-ingestion-modules/gx-plugin/**" + - "metadata-ingestion/**" + - "metadata-models/**" + pull_request: + branches: + - master + paths: + - ".github/**" + - "metadata-ingestion-modules/gx-plugin/**" + - "metadata-ingestion/**" + - "metadata-models/**" + release: + types: [published] + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + gx-plugin: + runs-on: ubuntu-latest + env: + SPARK_VERSION: 3.0.3 + DATAHUB_TELEMETRY_ENABLED: false + strategy: + matrix: + python-version: ["3.8", "3.10"] + include: + - python-version: "3.8" + extraPythonRequirement: "great-expectations~=0.15.12" + - python-version: "3.10" + extraPythonRequirement: "great-expectations~=0.16.0 numpy~=1.26.0" + - python-version: "3.11" + extraPythonRequirement: "great-expectations~=0.17.0" + fail-fast: false + steps: + - name: Set up JDK 17 + uses: actions/setup-java@v3 + with: + distribution: "zulu" + java-version: 17 + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + cache: "pip" + - name: Install dependencies + run: ./metadata-ingestion/scripts/install_deps.sh + - name: Install GX package and test (extras ${{ matrix.extraPythonRequirement }}) + run: ./gradlew -Pextra_pip_requirements='${{ matrix.extraPythonRequirement }}' :metadata-ingestion-modules:gx-plugin:lint :metadata-ingestion-modules:gx-plugin:testQuick + - name: pip freeze show list installed + if: always() + run: source metadata-ingestion-modules/gx-plugin/venv/bin/activate && pip freeze + - uses: actions/upload-artifact@v3 + if: ${{ always() && matrix.python-version == '3.11' && matrix.extraPythonRequirement == 'great-expectations~=0.17.0' }} + with: + name: Test Results (GX Plugin ${{ matrix.python-version}}) + path: | + **/build/reports/tests/test/** + **/build/test-results/test/** + **/junit.*.xml + - name: Upload coverage to Codecov + if: always() + uses: codecov/codecov-action@v3 + with: + token: ${{ secrets.CODECOV_TOKEN }} + directory: . + fail_ci_if_error: false + flags: gx-${{ matrix.python-version }}-${{ matrix.extraPythonRequirement }} + name: pytest-gx + verbose: true + + event-file: + runs-on: ubuntu-latest + steps: + - name: Upload + uses: actions/upload-artifact@v3 + with: + name: Event File + path: ${{ github.event_path }} diff --git a/.github/workflows/lint-actions.yml b/.github/workflows/lint-actions.yml index 4d83adbeba08a1..8a1777522f416b 100644 --- a/.github/workflows/lint-actions.yml +++ b/.github/workflows/lint-actions.yml @@ -14,3 +14,8 @@ jobs: - uses: reviewdog/action-actionlint@v1 with: reporter: github-pr-review + permissions: + contents: read + checks: write + pull-requests: write + issues: write diff --git a/.github/workflows/metadata-ingestion.yml b/.github/workflows/metadata-ingestion.yml index 51b97552eb150a..a27013c4bf4887 100644 --- a/.github/workflows/metadata-ingestion.yml +++ b/.github/workflows/metadata-ingestion.yml @@ -46,6 +46,11 @@ jobs: - python-version: "3.10" fail-fast: false steps: + - name: Free up disk space + run: | + sudo apt-get remove 'dotnet-*' azure-cli || true + sudo rm -rf /usr/local/lib/android/ || true + sudo docker image prune -a -f || true - name: Set up JDK 17 uses: actions/setup-java@v3 with: diff --git a/.github/workflows/metadata-io.yml b/.github/workflows/metadata-io.yml index 6797c7ad67c0b6..332330b4ed8984 100644 --- a/.github/workflows/metadata-io.yml +++ b/.github/workflows/metadata-io.yml @@ -47,6 +47,11 @@ jobs: timeout-minutes: 60 needs: setup steps: + - name: Free up disk space + run: | + sudo apt-get remove 'dotnet-*' azure-cli || true + sudo rm -rf /usr/local/lib/android/ || true + sudo docker image prune -a -f || true - uses: acryldata/sane-checkout-action@v3 - name: Set up JDK 17 uses: actions/setup-java@v3 diff --git a/.github/workflows/metadata-model.yml b/.github/workflows/metadata-model.yml index 558b7c80f727c1..d62c03057db3f0 100644 --- a/.github/workflows/metadata-model.yml +++ b/.github/workflows/metadata-model.yml @@ -49,7 +49,7 @@ jobs: run: ./gradlew :metadata-ingestion:modelDocGen - name: Configure AWS Credentials if: ${{ needs.setup.outputs.publish == 'true' }} - uses: aws-actions/configure-aws-credentials@v3 + uses: aws-actions/configure-aws-credentials@v4 with: aws-access-key-id: ${{ secrets.ACRYL_CI_ARTIFACTS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.ACRYL_CI_ARTIFACTS_ACCESS_KEY }} diff --git a/.github/workflows/publish-datahub-jars.yml b/.github/workflows/publish-datahub-jars.yml index 7137302c73564c..aceee756339ada 100644 --- a/.github/workflows/publish-datahub-jars.yml +++ b/.github/workflows/publish-datahub-jars.yml @@ -45,6 +45,9 @@ jobs: echo "tag=$TAG" >> $GITHUB_OUTPUT publish: runs-on: ubuntu-latest + permissions: + id-token: write + contents: read needs: ["check-secret", "setup"] if: ${{ needs.check-secret.outputs.publish-enabled == 'true' }} steps: diff --git a/.github/workflows/spark-smoke-test.yml b/.github/workflows/spark-smoke-test.yml index 8ffc8420ba9413..d1618c65285773 100644 --- a/.github/workflows/spark-smoke-test.yml +++ b/.github/workflows/spark-smoke-test.yml @@ -44,8 +44,11 @@ jobs: run: ./metadata-ingestion/scripts/install_deps.sh - name: Disk Check run: df -h . && docker images - - name: Remove images - run: docker image prune -a -f || true + - name: Free up disk space + run: | + sudo apt-get remove 'dotnet-*' azure-cli || true + sudo rm -rf /usr/local/lib/android/ || true + sudo docker image prune -a -f || true - name: Disk Check run: df -h . && docker images - name: Smoke test diff --git a/.github/workflows/test-results.yml b/.github/workflows/test-results.yml index c94a5fc340f473..947fc35f169a04 100644 --- a/.github/workflows/test-results.yml +++ b/.github/workflows/test-results.yml @@ -2,7 +2,7 @@ name: Test Results on: workflow_run: - workflows: ["build & test", "metadata ingestion", "Airflow Plugin", "Dagster Plugin"] + workflows: ["build & test", "metadata ingestion", "Airflow Plugin", "Dagster Plugin", "GX Plugin"] types: - completed @@ -10,6 +10,11 @@ jobs: unit-test-results: name: Unit Test Results runs-on: ubuntu-latest + permissions: + contents: read + actions: read + checks: write + issues: read if: github.event.workflow_run.conclusion != 'skipped' steps: diff --git a/README.md b/README.md index 3ac0668918f708..a0a41b692676ea 100644 --- a/README.md +++ b/README.md @@ -108,6 +108,10 @@ We welcome contributions from the community. Please refer to our [Contributing G Join our [Slack workspace](https://datahubproject.io/slack?utm_source=github&utm_medium=readme&utm_campaign=github_readme) for discussions and important announcements. You can also find out more about our upcoming [town hall meetings](docs/townhalls.md) and view past recordings. +## Security + +See [Security Stance](docs/SECURITY_STANCE.md) for information on DataHub's Security. + ## Adoption Here are the companies that have officially adopted DataHub. Please feel free to add yours to the list if we missed it. @@ -138,6 +142,7 @@ Here are the companies that have officially adopted DataHub. Please feel free to - [Peloton](https://www.onepeloton.com) - [PITS Global Data Recovery Services](https://www.pitsdatarecovery.net/) - [Razer](https://www.razer.com) +- [Rippling](https://www.rippling.com/) - [Showroomprive](https://www.showroomprive.com/) - [SpotHero](https://spothero.com) - [Stash](https://www.stash.com) @@ -153,6 +158,7 @@ Here are the companies that have officially adopted DataHub. Please feel free to - [Zynga](https://www.zynga.com) + ## Select Articles & Talks - [DataHub Blog](https://blog.datahubproject.io/) diff --git a/build.gradle b/build.gradle index adb45705c0ebd0..fbced335ddc2e7 100644 --- a/build.gradle +++ b/build.gradle @@ -34,7 +34,7 @@ buildscript { // Releases: https://github.com/linkedin/rest.li/blob/master/CHANGELOG.md ext.pegasusVersion = '29.57.0' ext.mavenVersion = '3.6.3' - ext.springVersion = '6.1.5' + ext.springVersion = '6.1.6' ext.springBootVersion = '3.2.6' ext.springKafkaVersion = '3.1.6' ext.openTelemetryVersion = '1.18.0' @@ -49,7 +49,7 @@ buildscript { ext.log4jVersion = '2.23.1' ext.slf4jVersion = '1.7.36' ext.logbackClassic = '1.4.14' - ext.hadoop3Version = '3.3.5' + ext.hadoop3Version = '3.3.6' ext.kafkaVersion = '5.5.15' ext.hazelcastVersion = '5.3.6' ext.ebeanVersion = '12.16.1' @@ -134,7 +134,7 @@ project.ext.externalDependency = [ 'elasticSearchRest': 'org.opensearch.client:opensearch-rest-high-level-client:' + elasticsearchVersion, 'elasticSearchJava': 'org.opensearch.client:opensearch-java:2.6.0', 'findbugsAnnotations': 'com.google.code.findbugs:annotations:3.0.1', - 'graphqlJava': 'com.graphql-java:graphql-java:21.3', + 'graphqlJava': 'com.graphql-java:graphql-java:21.5', 'graphqlJavaScalars': 'com.graphql-java:graphql-java-extended-scalars:21.0', 'gson': 'com.google.code.gson:gson:2.8.9', 'guice': 'com.google.inject:guice:7.0.0', diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java index db9bf304a1085b..b470da3c7c74aa 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java @@ -2728,9 +2728,11 @@ private void configureFormResolvers(final RuntimeWiring.Builder builder) { corpUserType, (env) -> { final FormActorAssignment actors = env.getSource(); - return actors.getUsers().stream() - .map(CorpUser::getUrn) - .collect(Collectors.toList()); + return actors.getUsers() != null + ? actors.getUsers().stream() + .map(CorpUser::getUrn) + .collect(Collectors.toList()) + : null; })) .dataFetcher( "groups", @@ -2738,9 +2740,11 @@ private void configureFormResolvers(final RuntimeWiring.Builder builder) { corpGroupType, (env) -> { final FormActorAssignment actors = env.getSource(); - return actors.getGroups().stream() - .map(CorpGroup::getUrn) - .collect(Collectors.toList()); + return actors.getGroups() != null + ? actors.getGroups().stream() + .map(CorpGroup::getUrn) + .collect(Collectors.toList()) + : null; })) .dataFetcher("isAssignedToMe", new IsFormAssignedToMeResolver(groupService))); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/FormUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/FormUtils.java index 17718f39c12387..d118c04d19393d 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/FormUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/FormUtils.java @@ -202,7 +202,7 @@ public static FormActorAssignment mapFormActorAssignment( if (input.getGroups() != null) { UrnArray groupUrns = new UrnArray(); input.getGroups().forEach(group -> groupUrns.add(UrnUtils.getUrn(group))); - result.setUsers(groupUrns); + result.setGroups(groupUrns); } return result; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java index 29056eb71a7a3a..ddb795189c0e3d 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java @@ -171,7 +171,7 @@ public static boolean isOwnerEqual( if (!owner.getOwner().equals(ownerUrn)) { return false; } - if (owner.getTypeUrn() != null) { + if (owner.getTypeUrn() != null && ownershipTypeUrn != null) { return owner.getTypeUrn().equals(ownershipTypeUrn); } if (ownershipTypeUrn == null) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/settings/docPropagation/DocPropagationSettingsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/settings/docPropagation/DocPropagationSettingsResolver.java index 84d3bcd7b376c0..0641d6aca63704 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/settings/docPropagation/DocPropagationSettingsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/settings/docPropagation/DocPropagationSettingsResolver.java @@ -33,7 +33,9 @@ public CompletableFuture get(final DataFetchingEnvironme final GlobalSettingsInfo globalSettings = _settingsService.getGlobalSettings(context.getOperationContext()); final DocPropagationSettings defaultSettings = new DocPropagationSettings(); - defaultSettings.setDocColumnPropagation(true); + // TODO: Enable by default. Currently the automation trusts the settings aspect, which + // does not have this. + defaultSettings.setDocColumnPropagation(false); return globalSettings != null && globalSettings.hasDocPropagation() ? mapDocPropagationSettings(globalSettings.getDocPropagation()) : defaultSettings; diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/utils/OwnerUtilsTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/utils/OwnerUtilsTest.java index b4097d9dd045df..d524d8bfb9a6b3 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/utils/OwnerUtilsTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/utils/OwnerUtilsTest.java @@ -59,6 +59,7 @@ public void testIsOwnerEqualOnlyOwnershipTypeUrn() throws URISyntaxException { Urn technicalOwnershipTypeUrn = new Urn(TECHNICAL_OWNER_OWNERSHIP_TYPE_URN); Urn businessOwnershipTypeUrn = new Urn(BUSINESS_OWNER_OWNERSHIP_TYPE_URN); Urn ownerUrn1 = new Urn("urn:li:corpuser:foo"); + Urn ownerUrn2 = new Urn("urn:li:corpuser:bar"); Owner ownerWithTechnicalOwnership = new Owner(); ownerWithTechnicalOwnership.setOwner(ownerUrn1); @@ -72,12 +73,17 @@ public void testIsOwnerEqualOnlyOwnershipTypeUrn() throws URISyntaxException { ownerWithoutOwnershipType.setOwner(ownerUrn1); ownerWithoutOwnershipType.setType(OwnershipType.NONE); + Owner owner2WithoutOwnershipType = new Owner(); + owner2WithoutOwnershipType.setOwner(ownerUrn2); + owner2WithoutOwnershipType.setType(OwnershipType.NONE); + assertTrue( OwnerUtils.isOwnerEqual(ownerWithTechnicalOwnership, ownerUrn1, technicalOwnershipTypeUrn)); assertFalse( OwnerUtils.isOwnerEqual(ownerWithBusinessOwnership, ownerUrn1, technicalOwnershipTypeUrn)); - assertFalse(OwnerUtils.isOwnerEqual(ownerWithTechnicalOwnership, ownerUrn1, null)); + assertTrue(OwnerUtils.isOwnerEqual(ownerWithTechnicalOwnership, ownerUrn1, null)); assertTrue(OwnerUtils.isOwnerEqual(ownerWithoutOwnershipType, ownerUrn1, null)); + assertFalse(OwnerUtils.isOwnerEqual(owner2WithoutOwnershipType, ownerUrn1, null)); } public void testIsOwnerEqualWithBothLegacyAndNewType() throws URISyntaxException { diff --git a/datahub-upgrade/build.gradle b/datahub-upgrade/build.gradle index 304bf3a67a5b27..b66371db73386f 100644 --- a/datahub-upgrade/build.gradle +++ b/datahub-upgrade/build.gradle @@ -49,13 +49,16 @@ dependencies { implementation('io.airlift:aircompressor:0.27') { because("CVE-2024-36114") } + implementation('dnsjava:dnsjava:3.6.1') { + because("CVE-2024-25638") + } } // mock internal schema registry implementation externalDependency.kafkaAvroSerde implementation externalDependency.kafkaAvroSerializer - implementation "org.apache.kafka:kafka_2.12:3.7.0" + implementation "org.apache.kafka:kafka_2.12:3.7.1" implementation externalDependency.slf4jApi compileOnly externalDependency.lombok diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/ReindexDomainDescriptionConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/ReindexDomainDescriptionConfig.java new file mode 100644 index 00000000000000..3cdab0dc4d4bc6 --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/ReindexDomainDescriptionConfig.java @@ -0,0 +1,29 @@ +package com.linkedin.datahub.upgrade.config; + +import com.linkedin.datahub.upgrade.system.NonBlockingSystemUpgrade; +import com.linkedin.datahub.upgrade.system.domaindescription.ReindexDomainDescription; +import com.linkedin.metadata.entity.AspectDao; +import com.linkedin.metadata.entity.EntityService; +import io.datahubproject.metadata.context.OperationContext; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Conditional; +import org.springframework.context.annotation.Configuration; + +@Configuration +@Conditional(SystemUpdateCondition.NonBlockingSystemUpdateCondition.class) +public class ReindexDomainDescriptionConfig { + + @Bean + public NonBlockingSystemUpgrade reindexDomainDescription( + final OperationContext opContext, + final EntityService entityService, + final AspectDao aspectDao, + @Value("${systemUpdate.domainDescription.enabled}") final boolean enabled, + @Value("${systemUpdate.domainDescription.batchSize}") final Integer batchSize, + @Value("${systemUpdate.domainDescription.delayMs}") final Integer delayMs, + @Value("${systemUpdate.domainDescription.limit}") final Integer limit) { + return new ReindexDomainDescription( + opContext, entityService, aspectDao, enabled, batchSize, delayMs, limit); + } +} diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/RestoreIndicesConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/RestoreIndicesConfig.java index 949b75edaa6ba0..26e40485787e90 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/RestoreIndicesConfig.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/RestoreIndicesConfig.java @@ -8,37 +8,23 @@ import io.ebean.Database; import javax.annotation.Nonnull; import lombok.extern.slf4j.Slf4j; -import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; -import org.springframework.context.ApplicationContext; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; -import org.springframework.context.annotation.DependsOn; @Slf4j @Configuration public class RestoreIndicesConfig { - @Autowired ApplicationContext applicationContext; @Bean(name = "restoreIndices") - @DependsOn({ - "ebeanServer", - "entityService", - "systemMetadataService", - "searchService", - "graphService" - }) @ConditionalOnProperty(name = "entityService.impl", havingValue = "ebean", matchIfMissing = true) @Nonnull - public RestoreIndices createInstance() { - final Database ebeanServer = applicationContext.getBean(Database.class); - final EntityService entityService = applicationContext.getBean(EntityService.class); - final SystemMetadataService systemMetadataService = - applicationContext.getBean(SystemMetadataService.class); - final EntitySearchService entitySearchService = - applicationContext.getBean(EntitySearchService.class); - final GraphService graphService = applicationContext.getBean(GraphService.class); - + public RestoreIndices createInstance( + final Database ebeanServer, + final EntityService entityService, + final EntitySearchService entitySearchService, + final GraphService graphService, + final SystemMetadataService systemMetadataService) { return new RestoreIndices( ebeanServer, entityService, systemMetadataService, entitySearchService, graphService); } diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/domaindescription/ReindexDomainDescription.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/domaindescription/ReindexDomainDescription.java new file mode 100644 index 00000000000000..85af912e24f68a --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/domaindescription/ReindexDomainDescription.java @@ -0,0 +1,49 @@ +package com.linkedin.datahub.upgrade.system.domaindescription; + +import com.google.common.collect.ImmutableList; +import com.linkedin.datahub.upgrade.UpgradeStep; +import com.linkedin.datahub.upgrade.system.NonBlockingSystemUpgrade; +import com.linkedin.metadata.entity.AspectDao; +import com.linkedin.metadata.entity.EntityService; +import io.datahubproject.metadata.context.OperationContext; +import java.util.List; +import javax.annotation.Nonnull; +import lombok.extern.slf4j.Slf4j; + +/** + * A job that reindexes all domain aspects as part of reindexing descriptions This is required to + * fix the analytics for domains + */ +@Slf4j +public class ReindexDomainDescription implements NonBlockingSystemUpgrade { + + private final List _steps; + + public ReindexDomainDescription( + @Nonnull OperationContext opContext, + EntityService entityService, + AspectDao aspectDao, + boolean enabled, + Integer batchSize, + Integer batchDelayMs, + Integer limit) { + if (enabled) { + _steps = + ImmutableList.of( + new ReindexDomainDescriptionStep( + opContext, entityService, aspectDao, batchSize, batchDelayMs, limit)); + } else { + _steps = ImmutableList.of(); + } + } + + @Override + public String id() { + return this.getClass().getName(); + } + + @Override + public List steps() { + return _steps; + } +} diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/domaindescription/ReindexDomainDescriptionStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/domaindescription/ReindexDomainDescriptionStep.java new file mode 100644 index 00000000000000..1fa8bc92af078f --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/domaindescription/ReindexDomainDescriptionStep.java @@ -0,0 +1,42 @@ +package com.linkedin.datahub.upgrade.system.domaindescription; + +import static com.linkedin.metadata.Constants.*; + +import com.linkedin.datahub.upgrade.system.AbstractMCLStep; +import com.linkedin.metadata.entity.AspectDao; +import com.linkedin.metadata.entity.EntityService; +import io.datahubproject.metadata.context.OperationContext; +import javax.annotation.Nonnull; +import lombok.extern.slf4j.Slf4j; +import org.jetbrains.annotations.Nullable; + +@Slf4j +public class ReindexDomainDescriptionStep extends AbstractMCLStep { + + public ReindexDomainDescriptionStep( + OperationContext opContext, + EntityService entityService, + AspectDao aspectDao, + Integer batchSize, + Integer batchDelayMs, + Integer limit) { + super(opContext, entityService, aspectDao, batchSize, batchDelayMs, limit); + } + + @Override + public String id() { + return "domain-description-v1"; + } + + @Nonnull + @Override + protected String getAspectName() { + return DOMAIN_PROPERTIES_ASPECT_NAME; + } + + @Nullable + @Override + protected String getUrnLike() { + return "urn:li:" + DOMAIN_ENTITY_NAME + ":%"; + } +} diff --git a/datahub-web-react/.eslintrc.js b/datahub-web-react/.eslintrc.js index 5627283af1af1c..3fdf7b6a3042ca 100644 --- a/datahub-web-react/.eslintrc.js +++ b/datahub-web-react/.eslintrc.js @@ -48,7 +48,7 @@ module.exports = { ], 'vitest/prefer-to-be': 'off', '@typescript-eslint/no-use-before-define': ['error', { functions: false, classes: false }], - 'react-refresh/only-export-components': ['warn', { 'allowConstantExport': true }], + 'react-refresh/only-export-components': ['warn', { allowConstantExport: true }], }, settings: { react: { diff --git a/datahub-web-react/README.md b/datahub-web-react/README.md index 560f5315b2c71f..86bbb349b027c4 100644 --- a/datahub-web-react/README.md +++ b/datahub-web-react/README.md @@ -1,44 +1,47 @@ --- -title: "datahub-web-react" +title: 'datahub-web-react' --- # DataHub React App ## About -This module contains a React application that serves as the DataHub UI. -Feel free to take a look around, deploy, and contribute. +This module contains a React application that serves as the DataHub UI. +Feel free to take a look around, deploy, and contribute. ## Functional Goals + The initial milestone for the app was to achieve functional parity with the previous Ember app. This meant supporting -- Dataset Profiles, Search, Browse Experience -- User Profiles, Search -- LDAP Authentication Flow +- Dataset Profiles, Search, Browse Experience +- User Profiles, Search +- LDAP Authentication Flow -This has since been achieved. The new set of functional goals are reflected in the latest version of the [DataHub Roadmap](../docs/roadmap.md). +This has since been achieved. The new set of functional goals are reflected in the latest version of the [DataHub Roadmap](../docs/roadmap.md). ## Design Goals + In building out the client experience, we intend to leverage learnings from the previous Ember-based app and incorporate feedback gathered from organizations operating DataHub. Two themes have emerged to serve as guideposts: -1. **Configurability**: The client experience should be configurable, such that deploying organizations can tailor certain - aspects to their needs. This includes theme / styling configurability, showing and hiding specific functionality, - customizing copy & logos, etc. - -2. **Extensibility**: Extending the *functionality* of DataHub should be as simple as possible. Making changes like - extending an existing entity & adding a new entity should require minimal effort and should be well covered in detailed - documentation. +1. **Configurability**: The client experience should be configurable, such that deploying organizations can tailor certain + aspects to their needs. This includes theme / styling configurability, showing and hiding specific functionality, + customizing copy & logos, etc. +2. **Extensibility**: Extending the _functionality_ of DataHub should be as simple as possible. Making changes like + extending an existing entity & adding a new entity should require minimal effort and should be well covered in detailed + documentation. ## Starting the Application ### Quick Start Navigate to the `docker` directory and run the following to spin up the react app: + ``` ./quickstart.sh ``` + at `http://localhost:9002`. If you want to make changes to the UI see them live without having to rebuild the `datahub-frontend-react` docker image, you @@ -54,8 +57,9 @@ Optionally you could also start the app with the mock server without running the ### Testing your customizations There is two options to test your customizations: -* **Option 1**: Initialize the docker containers with the `quickstart.sh` script (or if any custom docker-compose file) and then run `yarn start` in this directory. This will start a forwarding server at `localhost:3000` that will use the `datahub-frontend` server at `http://localhost:9002` to fetch real data. -* **Option 2**: Change the environment variable `REACT_APP_PROXY_TARGET` in the `.env` file to point to your `datahub-frontend` server (ex: https://my_datahub_host.com) and then run `yarn start` in this directory. This will start a forwarding server at `localhost:3000` that will use the `datahub-frontend` server at some domain to fetch real data. + +- **Option 1**: Initialize the docker containers with the `quickstart.sh` script (or if any custom docker-compose file) and then run `yarn start` in this directory. This will start a forwarding server at `localhost:3000` that will use the `datahub-frontend` server at `http://localhost:9002` to fetch real data. +- **Option 2**: Change the environment variable `REACT_APP_PROXY_TARGET` in the `.env` file to point to your `datahub-frontend` server (ex: https://my_datahub_host.com) and then run `yarn start` in this directory. This will start a forwarding server at `localhost:3000` that will use the `datahub-frontend` server at some domain to fetch real data. The option 2 is useful if you want to test your React customizations without having to run the hole DataHub stack locally. However, if you changed other components of the DataHub stack, you will need to run the hole stack locally (building the docker images) and use the option 1. @@ -68,10 +72,10 @@ In order to start a server and run frontend unit tests using react-testing-frame There are also more automated tests using Cypress in the `smoke-test` folder of the repository root. #### Troubleshooting + `Error: error:0308010C:digital envelope routines::unsupported`: This error message shows up when using Node 17, due to an OpenSSL update related to md5. The best workaround is to revert to the Active LTS version of Node, 16.13.0 with the command `nvm install 16.13.0` and if necessary reinstall yarn `npm install --global yarn`. - ### Theming #### Customizing your App without rebuilding assets @@ -108,74 +112,74 @@ you to terminate and re-run `yarn start` to see updated styles. The `src` dir of the app is broken down into the following modules -**conf** - Stores global configuration flags that can be referenced across the app. For example, the number of +**conf** - Stores global configuration flags that can be referenced across the app. For example, the number of search results shown per page, or the placeholder text in the search bar box. It serves as a location where levels -for functional configurability should reside. +for functional configurability should reside. **app** - Contains all important components of the app. It has a few sub-modules: -- `auth`: Components used to render the user authentication experience. -- `browse`: Shared components used to render the 'browse-by-path' experience. The experience is akin to navigating a filesystem hierarchy. -- `preview`: Shared components used to render Entity 'preview' views. These can appear in search results, browse results, - and within entity profile pages. -- `search`: Shared components used to render the full-text search experience. -- `shared`: Misc. shared components -- `entity`: Contains Entity definitions, where entity-specific functionality resides. - Configuration is provided by implementing the 'Entity' interface. (See DatasetEntity.tsx for example) - There are 2 visual components each entity should supply: - - `profiles`: display relevant details about an individual entity. This serves as the entity's 'profile'. - - `previews`: provide a 'preview', or a smaller details card, containing the most important information about an entity instance. - - When rendering a preview, the entity's data and the type of preview (SEARCH, BROWSE, PREVIEW) are provided. This +- `auth`: Components used to render the user authentication experience. +- `browse`: Shared components used to render the 'browse-by-path' experience. The experience is akin to navigating a filesystem hierarchy. +- `preview`: Shared components used to render Entity 'preview' views. These can appear in search results, browse results, + and within entity profile pages. +- `search`: Shared components used to render the full-text search experience. +- `shared`: Misc. shared components +- `entity`: Contains Entity definitions, where entity-specific functionality resides. + Configuration is provided by implementing the 'Entity' interface. (See DatasetEntity.tsx for example) + There are 2 visual components each entity should supply: + + - `profiles`: display relevant details about an individual entity. This serves as the entity's 'profile'. + - `previews`: provide a 'preview', or a smaller details card, containing the most important information about an entity instance. + + When rendering a preview, the entity's data and the type of preview (SEARCH, BROWSE, PREVIEW) are provided. This allows you to optionally customize the way an entities preview is rendered in different views. - - - `entity registry`: There's another very important piece of code living within this module: the **EntityRegistry**. This is a layer + + - `entity registry`: There's another very important piece of code living within this module: the **EntityRegistry**. This is a layer of abstraction over the intimate details of rendering a particular entity. It is used to render a view associated with a particular entity type (user, dataset, etc.). - - +

-**graphql** - The React App talks to the `dathub-frontend` server using GraphQL. This module is where the *queries* issued -against the server are defined. Once defined, running `yarn run generate` will code-gen TypeScript objects to make invoking +**graphql** - The React App talks to the `dathub-frontend` server using GraphQL. This module is where the _queries_ issued +against the server are defined. Once defined, running `yarn run generate` will code-gen TypeScript objects to make invoking these queries extremely easy. An example can be found at the top of `SearchPage.tsx.` -**images** - Images to be displayed within the app. This is where one would place a custom logo image. +**images** - Images to be displayed within the app. This is where one would place a custom logo image. ## Adding an Entity The following outlines a series of steps required to introduce a new entity into the React app: -1. Declare the GraphQL Queries required to display the new entity - - If search functionality should be supported, extend the "search" query within `search.graphql` to fetch the new +1. Declare the GraphQL Queries required to display the new entity + + - If search functionality should be supported, extend the "search" query within `search.graphql` to fetch the new + entity data. + - If browse functionality should be supported, extend the "browse" query within `browse.graphql` to fetch the new entity data. - - If browse functionality should be supported, extend the "browse" query within `browse.graphql` to fetch the new - entity data. - - If display a 'profile' should be supported (most often), introduce a new `.graphql` file that contains a - `get` query to fetch the entity by primary key (urn). - - Note that your new entity *must* implement the `Entity` GraphQL type interface, and thus must have a corresponding - `EntityType`. - - -2. Implement the `Entity` interface + - If display a 'profile' should be supported (most often), introduce a new `.graphql` file that contains a + `get` query to fetch the entity by primary key (urn). + + Note that your new entity _must_ implement the `Entity` GraphQL type interface, and thus must have a corresponding + `EntityType`. + +2. Implement the `Entity` interface + - Create a new folder under `src/components/entity` corresponding to your entity - Create a class that implements the `Entity` interface (example: `DatasetEntity.tsx`) - - Provide an implementation each method defined on the interface. - - This class specifies whether your new entity should be searchable & browsable, defines the names used to - identify your entity when instances are rendered in collection / when entity appears - in the URL path, and provides the ability to render your entity given data returned by the GQL API. - + - Provide an implementation each method defined on the interface. + - This class specifies whether your new entity should be searchable & browsable, defines the names used to + identify your entity when instances are rendered in collection / when entity appears + in the URL path, and provides the ability to render your entity given data returned by the GQL API. 3. Register the new entity in the `EntityRegistry` - - Update `App.tsx` to register an instance of your new entity. Now your entity will be accessible via the registry + - Update `App.tsx` to register an instance of your new entity. Now your entity will be accessible via the registry and appear in the UI. To manually retrieve the info about your entity or others, simply use an instance - of the `EntityRegistry`, which is provided via `ReactContext` to *all* components in the hierarchy. + of the `EntityRegistry`, which is provided via `ReactContext` to _all_ components in the hierarchy. For example - ``` - entityRegistry.getCollectionName(EntityType.YOUR_NEW_ENTITY) - ``` - -That's it! For any questions, do not hesitate to reach out on the DataHub Slack community in #datahub-react. + ``` + entityRegistry.getCollectionName(EntityType.YOUR_NEW_ENTITY) + ``` + +That's it! For any questions, do not hesitate to reach out on the DataHub Slack community in #datahub-react. diff --git a/datahub-web-react/src/app/analytics/event.ts b/datahub-web-react/src/app/analytics/event.ts index d63b731c720426..c3a57830b8c504 100644 --- a/datahub-web-react/src/app/analytics/event.ts +++ b/datahub-web-react/src/app/analytics/event.ts @@ -191,6 +191,7 @@ export interface SearchResultClickEvent extends BaseEvent { entityTypeFilter?: EntityType; index: number; total: number; + pageNumber: number; } export interface SearchFiltersClearAllEvent extends BaseEvent { diff --git a/datahub-web-react/src/app/entity/dataset/profile/schema/components/SchemaDescriptionField.tsx b/datahub-web-react/src/app/entity/dataset/profile/schema/components/SchemaDescriptionField.tsx index ce8d03fbdc9602..e7d986028d4a66 100644 --- a/datahub-web-react/src/app/entity/dataset/profile/schema/components/SchemaDescriptionField.tsx +++ b/datahub-web-react/src/app/entity/dataset/profile/schema/components/SchemaDescriptionField.tsx @@ -5,6 +5,8 @@ import styled from 'styled-components'; import { FetchResult } from '@apollo/client'; import { UpdateDatasetMutation } from '../../../../../../graphql/dataset.generated'; +import { StringMapEntry } from '../../../../../../types.generated'; +import PropagationDetails from '../../../../shared/propagation/PropagationDetails'; import UpdateDescriptionModal from '../../../../shared/components/legacy/DescriptionModal'; import StripMarkdownText, { removeMarkdown } from '../../../../shared/components/styled/StripMarkdownText'; import SchemaEditableContext from '../../../../../shared/SchemaEditableContext'; @@ -28,6 +30,11 @@ const ExpandedActions = styled.div` height: 10px; `; +const DescriptionWrapper = styled.span` + display: inline-flex; + align-items: center; +`; + const DescriptionContainer = styled.div` position: relative; display: flex; @@ -105,6 +112,8 @@ type Props = { isEdited?: boolean; isReadOnly?: boolean; businessAttributeDescription?: string; + isPropagated?: boolean; + sourceDetail?: StringMapEntry[] | null; }; const ABBREVIATED_LIMIT = 80; @@ -120,6 +129,8 @@ export default function DescriptionField({ original, isReadOnly, businessAttributeDescription, + isPropagated, + sourceDetail, }: Props) { const [showAddModal, setShowAddModal] = useState(false); const overLimit = removeMarkdown(description).length > 80; @@ -163,7 +174,7 @@ export default function DescriptionField({ return ( - {expanded || !overLimit ? ( + {expanded ? ( <> {!!description && } {!!description && (EditButton || overLimit) && ( @@ -184,25 +195,29 @@ export default function DescriptionField({ ) : ( <> - - { - e.stopPropagation(); - handleExpanded(true); - }} - > - Read More - - - } - suffix={EditButton} - shouldWrap - > - {description} - + + {isPropagated && } +   + + { + e.stopPropagation(); + handleExpanded(true); + }} + > + Read More + + + } + suffix={EditButton} + shouldWrap + > + {description} + + )} {isEdited && (edited)} diff --git a/datahub-web-react/src/app/entity/shared/components/legacy/DescriptionModal.tsx b/datahub-web-react/src/app/entity/shared/components/legacy/DescriptionModal.tsx index 0e899bc391e0a7..2d65a305b4cc8b 100644 --- a/datahub-web-react/src/app/entity/shared/components/legacy/DescriptionModal.tsx +++ b/datahub-web-react/src/app/entity/shared/components/legacy/DescriptionModal.tsx @@ -19,16 +19,29 @@ const StyledViewer = styled(Editor)` } `; +const OriginalDocumentation = styled(Form.Item)` + margin-bottom: 0; +`; + type Props = { title: string; description?: string | undefined; original?: string | undefined; + propagatedDescription?: string | undefined; onClose: () => void; onSubmit: (description: string) => void; isAddDesc?: boolean; }; -export default function UpdateDescriptionModal({ title, description, original, onClose, onSubmit, isAddDesc }: Props) { +export default function UpdateDescriptionModal({ + title, + description, + original, + propagatedDescription, + onClose, + onSubmit, + isAddDesc, +}: Props) { const [updatedDesc, setDesc] = useState(description || original || ''); const handleEditorKeyDown = (event: React.KeyboardEvent) => { @@ -72,9 +85,14 @@ export default function UpdateDescriptionModal({ title, description, original, o /> {!isAddDesc && description && original && ( - Original:}> + Original:}> - + + )} + {!isAddDesc && description && propagatedDescription && ( + Propagated:}> + + )} diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/__tests__/EntityHeader.test.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/__tests__/EntityHeader.test.tsx index db347d4f1cc54c..ec6a91df9019ab 100644 --- a/datahub-web-react/src/app/entity/shared/containers/profile/__tests__/EntityHeader.test.tsx +++ b/datahub-web-react/src/app/entity/shared/containers/profile/__tests__/EntityHeader.test.tsx @@ -3,13 +3,14 @@ import { EntityType } from '../../../../../../types.generated'; import { getCanEditName } from '../header/EntityHeader'; describe('getCanEditName', () => { - const entityDataWithManagePrivileges = { privileges: { canManageEntity: true } }; - const entityDataWithoutManagePrivileges = { privileges: { canManageEntity: false } }; + const entityDataWithManagePrivileges = { privileges: { canManageEntity: true, canEditProperties: true } }; + const entityDataWithoutManagePrivileges = { privileges: { canManageEntity: false, canEditProperties: false } }; it('should return true for Terms if manageGlossaries privilege is true', () => { const canEditName = getCanEditName( EntityType.GlossaryTerm, entityDataWithoutManagePrivileges, + true, platformPrivileges, ); @@ -21,6 +22,7 @@ describe('getCanEditName', () => { const canEditName = getCanEditName( EntityType.GlossaryTerm, entityDataWithoutManagePrivileges, + true, privilegesWithoutGlossaries, ); @@ -32,6 +34,7 @@ describe('getCanEditName', () => { const canEditName = getCanEditName( EntityType.GlossaryTerm, entityDataWithManagePrivileges, + true, privilegesWithoutGlossaries, ); @@ -42,6 +45,7 @@ describe('getCanEditName', () => { const canEditName = getCanEditName( EntityType.GlossaryNode, entityDataWithoutManagePrivileges, + true, platformPrivileges, ); @@ -53,6 +57,7 @@ describe('getCanEditName', () => { const canEditName = getCanEditName( EntityType.GlossaryNode, entityDataWithoutManagePrivileges, + true, privilegesWithoutGlossaries, ); @@ -64,6 +69,7 @@ describe('getCanEditName', () => { const canEditName = getCanEditName( EntityType.GlossaryNode, entityDataWithManagePrivileges, + true, privilegesWithoutGlossaries, ); @@ -71,7 +77,12 @@ describe('getCanEditName', () => { }); it('should return true for Domains if manageDomains privilege is true', () => { - const canEditName = getCanEditName(EntityType.Domain, entityDataWithoutManagePrivileges, platformPrivileges); + const canEditName = getCanEditName( + EntityType.Domain, + entityDataWithoutManagePrivileges, + true, + platformPrivileges, + ); expect(canEditName).toBe(true); }); @@ -81,6 +92,7 @@ describe('getCanEditName', () => { const canEditName = getCanEditName( EntityType.Domain, entityDataWithoutManagePrivileges, + true, privilegesWithoutDomains, ); @@ -88,7 +100,30 @@ describe('getCanEditName', () => { }); it('should return false for an unsupported entity', () => { - const canEditName = getCanEditName(EntityType.Chart, entityDataWithManagePrivileges, platformPrivileges); + const canEditName = getCanEditName(EntityType.Chart, entityDataWithManagePrivileges, true, platformPrivileges); + + expect(canEditName).toBe(false); + }); + + it('should return true for a dataset if canEditProperties is true', () => { + const canEditName = getCanEditName(EntityType.Chart, entityDataWithManagePrivileges, true, platformPrivileges); + + expect(canEditName).toBe(false); + }); + + it('should return false for a dataset if canEditProperties is false', () => { + const canEditName = getCanEditName( + EntityType.Chart, + entityDataWithoutManagePrivileges, + true, + platformPrivileges, + ); + + expect(canEditName).toBe(false); + }); + + it('should return false for a dataset if isEditableDatasetNameEnabled is false', () => { + const canEditName = getCanEditName(EntityType.Chart, entityDataWithManagePrivileges, false, platformPrivileges); expect(canEditName).toBe(false); }); diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityHeader.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityHeader.tsx index 11335d0378760c..12fa9131f33c73 100644 --- a/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityHeader.tsx +++ b/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityHeader.tsx @@ -60,6 +60,7 @@ const TopButtonsWrapper = styled.div` export function getCanEditName( entityType: EntityType, entityData: GenericEntityProperties | null, + isEditableDatasetNameEnabled: boolean, privileges?: PlatformPrivileges, ) { switch (entityType) { @@ -73,7 +74,7 @@ export function getCanEditName( case EntityType.BusinessAttribute: return privileges?.manageBusinessAttributes; case EntityType.Dataset: - return entityData?.privileges?.canEditProperties; + return isEditableDatasetNameEnabled && entityData?.privileges?.canEditProperties; default: return false; } @@ -99,9 +100,13 @@ export const EntityHeader = ({ headerDropdownItems, headerActionItems, isNameEdi const isEditableDatasetNameEnabled = useIsEditableDatasetNameEnabled(); const canEditName = - isEditableDatasetNameEnabled && isNameEditable && - getCanEditName(entityType, entityData, me?.platformPrivileges as PlatformPrivileges); + getCanEditName( + entityType, + entityData, + isEditableDatasetNameEnabled, + me?.platformPrivileges as PlatformPrivileges, + ); const entityRegistry = useEntityRegistry(); return ( diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/EntitySidebar.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/EntitySidebar.tsx index a8d1dceb71ec92..b5e3b221c736d0 100644 --- a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/EntitySidebar.tsx +++ b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/EntitySidebar.tsx @@ -46,7 +46,7 @@ export const EntitySidebar = ({ sidebarSections, topSection }: Props) => { return ( <> {topSection && } - {entityData?.lastIngested && ( + {!!entityData?.lastIngested && ( diff --git a/datahub-web-react/src/app/entity/shared/propagation/PropagationDetails.tsx b/datahub-web-react/src/app/entity/shared/propagation/PropagationDetails.tsx new file mode 100644 index 00000000000000..646f47134938c4 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/propagation/PropagationDetails.tsx @@ -0,0 +1,109 @@ +import React from 'react'; +import styled from 'styled-components'; +import { Popover } from 'antd'; +import { StringMapEntry } from '../../../../types.generated'; +import PropagationEntityLink from './PropagationEntityLink'; +import { usePropagationDetails } from './utils'; +import { PropagateThunderbolt, PropagateThunderboltFilled } from './PropagationIcon'; + +const PopoverWrapper = styled.div` + display: flex; + flex-direction: column; +`; + +const PopoverTitle = styled.div` + font-weight: bold; + font-size: 14px; + padding: 6px 0px; + color: #eeecfa; +`; + +const PopoverDescription = styled.div` + max-width: 340px; + font-size: 14px; + color: #eeecfa; + display: inline; + padding: 0px 0px 8px 0px; +`; + +const PopoverAttributes = styled.div` + display: flex; +`; + +const PopoverAttribute = styled.div` + margin-right: 12px; + margin-bottom: 4px; +`; + +const PopoverAttributeTitle = styled.div` + font-size: 14px; + color: #eeecfa; + font-weight: bold; + margin: 8px 0px; + overflow: hidden; + text-overflow: ellipsis; +`; + +const PopoverDocumentation = styled.a` + margin-top: 12px; +`; + +interface Props { + sourceDetail?: StringMapEntry[] | null; +} + +export default function PropagationDetails({ sourceDetail }: Props) { + const { + isPropagated, + origin: { entity: originEntity }, + via: { entity: viaEntity }, + } = usePropagationDetails(sourceDetail); + + if (!sourceDetail || !isPropagated) return null; + + const popoverContent = + originEntity || viaEntity ? ( + + + This description was automatically propagated from an upstream column.{' '} + + Learn more + + + + {originEntity && originEntity.urn !== viaEntity?.urn && ( + + Origin + + + )} + {viaEntity && ( + + Via + + + )} + + + ) : undefined; + + return ( + + + Propagated Description + + } + content={popoverContent} + > + + + ); +} diff --git a/datahub-web-react/src/app/entity/shared/propagation/PropagationEntityLink.tsx b/datahub-web-react/src/app/entity/shared/propagation/PropagationEntityLink.tsx new file mode 100644 index 00000000000000..8c1285dd5808b1 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/propagation/PropagationEntityLink.tsx @@ -0,0 +1,56 @@ +import React from 'react'; +import styled from 'styled-components'; +import { Link } from 'react-router-dom'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import { Entity, EntityType, SchemaFieldEntity } from '../../../../types.generated'; +import { GenericEntityProperties } from '../types'; + +const PreviewImage = styled.img<{ size: number }>` + height: ${(props) => props.size}px; + width: ${(props) => props.size}px; + min-width: ${(props) => props.size}px; + object-fit: contain; + background-color: transparent; + margin: 0px 4px 0px 0px; +`; + +const StyledLink = styled(Link)` + margin-right: 4px; + display: flex; + align-items: center; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +`; + +interface Props { + entity: Entity; +} + +export default function PropagationEntityLink({ entity }: Props) { + const entityRegistry = useEntityRegistry(); + + const isSchemaField = entity.type === EntityType.SchemaField; + const baseEntity = isSchemaField ? (entity as SchemaFieldEntity).parent : entity; + + const logoUrl = (baseEntity as GenericEntityProperties)?.platform?.properties?.logoUrl || ''; + let entityUrl = entityRegistry.getEntityUrl(baseEntity.type, baseEntity.urn); + let entityDisplayName = entityRegistry.getDisplayName(baseEntity.type, baseEntity); + + if (isSchemaField) { + entityUrl = `${entityUrl}/${encodeURIComponent('Columns')}?schemaFilter=${encodeURIComponent( + (entity as SchemaFieldEntity).fieldPath, + )}`; + const schemaFieldName = entityRegistry.getDisplayName(entity.type, entity); + entityDisplayName = `${entityDisplayName}.${schemaFieldName}`; + } + + return ( + <> + + + {entityDisplayName} + + + ); +} diff --git a/datahub-web-react/src/app/entity/shared/propagation/PropagationIcon.tsx b/datahub-web-react/src/app/entity/shared/propagation/PropagationIcon.tsx new file mode 100644 index 00000000000000..01b4570c4ca0df --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/propagation/PropagationIcon.tsx @@ -0,0 +1,22 @@ +import styled from 'styled-components'; +import { ThunderboltFilled } from '@ant-design/icons'; +import { REDESIGN_COLORS } from '../constants'; + +export const PropagateThunderbolt = styled(ThunderboltFilled)` + && { + color: #a7c7fa; + } + font-size: 16px; + &:hover { + color: ${REDESIGN_COLORS.BLUE}; + } + margin-right: 4px; +`; + +export const PropagateThunderboltFilled = styled(ThunderboltFilled)` + && { + color: ${REDESIGN_COLORS.BLUE}; + } + font-size: 16px; + margin-right: 4px; +`; diff --git a/datahub-web-react/src/app/entity/shared/propagation/utils.ts b/datahub-web-react/src/app/entity/shared/propagation/utils.ts new file mode 100644 index 00000000000000..d8b4d4d931f4ee --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/propagation/utils.ts @@ -0,0 +1,24 @@ +import { StringMapEntry } from '../../../../types.generated'; +import { useGetEntities } from '../useGetEntities'; + +export function usePropagationDetails(sourceDetail?: StringMapEntry[] | null) { + const isPropagated = !!sourceDetail?.find((mapEntry) => mapEntry.key === 'propagated' && mapEntry.value === 'true'); + const originEntityUrn = sourceDetail?.find((mapEntry) => mapEntry.key === 'origin')?.value || ''; + const viaEntityUrn = sourceDetail?.find((mapEntry) => mapEntry.key === 'via')?.value || ''; + + const entities = useGetEntities([originEntityUrn, viaEntityUrn]); + const originEntity = entities.find((e) => e.urn === originEntityUrn); + const viaEntity = entities.find((e) => e.urn === viaEntityUrn); + + return { + isPropagated, + origin: { + urn: originEntityUrn, + entity: originEntity, + }, + via: { + urn: viaEntityUrn, + entity: viaEntity, + }, + }; +} diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/FieldDescription.tsx b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/FieldDescription.tsx index be95cba3ab4f07..e64a1436b0b1c5 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/FieldDescription.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/FieldDescription.tsx @@ -6,6 +6,8 @@ import styled from 'styled-components'; import { SectionHeader, StyledDivider } from './components'; import UpdateDescriptionModal from '../../../../../components/legacy/DescriptionModal'; import { EditableSchemaFieldInfo, SchemaField, SubResourceType } from '../../../../../../../../types.generated'; +import { getFieldDescriptionDetails } from '../../utils/getFieldDescriptionDetails'; +import PropagationDetails from '../../../../../propagation/PropagationDetails'; import DescriptionSection from '../../../../../containers/profile/sidebar/AboutSection/DescriptionSection'; import { useEntityData, useMutationUrn, useRefetch } from '../../../../../EntityContext'; import { useSchemaRefetch } from '../../SchemaContext'; @@ -13,11 +15,6 @@ import { useUpdateDescriptionMutation } from '../../../../../../../../graphql/mu import analytics, { EntityActionType, EventType } from '../../../../../../../analytics'; import SchemaEditableContext from '../../../../../../../shared/SchemaEditableContext'; -const DescriptionWrapper = styled.div` - display: flex; - justify-content: space-between; -`; - const EditIcon = styled(Button)` border: none; box-shadow: none; @@ -25,6 +22,13 @@ const EditIcon = styled(Button)` width: 20px; `; +const DescriptionWrapper = styled.div` + display: flex; + gap: 4px; + align-items: center; + justify-content: space-between; +`; + interface Props { expandedField: SchemaField; editableFieldInfo?: EditableSchemaFieldInfo; @@ -76,7 +80,13 @@ export default function FieldDescription({ expandedField, editableFieldInfo }: P }, }); - const displayedDescription = editableFieldInfo?.description || expandedField.description; + const { schemaFieldEntity, description } = expandedField; + const { displayedDescription, isPropagated, sourceDetail, propagatedDescription } = getFieldDescriptionDetails({ + schemaFieldEntity, + editableFieldInfo, + defaultDescription: description, + }); + const baDescription = expandedField?.schemaFieldEntity?.businessAttributes?.businessAttribute?.businessAttribute?.properties ?.description; @@ -87,12 +97,17 @@ export default function FieldDescription({ expandedField, editableFieldInfo }: P
Description - + + {isPropagated && } + {!!displayedDescription && ( + + )} +
{isSchemaEditable && ( )} diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/utils/getFieldDescriptionDetails.ts b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/utils/getFieldDescriptionDetails.ts new file mode 100644 index 00000000000000..6434baddb77a66 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/utils/getFieldDescriptionDetails.ts @@ -0,0 +1,25 @@ +import { EditableSchemaFieldInfo, SchemaFieldEntity } from '../../../../../../../types.generated'; + +interface Props { + schemaFieldEntity?: SchemaFieldEntity | null; + editableFieldInfo?: EditableSchemaFieldInfo; + defaultDescription?: string | null; +} + +export function getFieldDescriptionDetails({ schemaFieldEntity, editableFieldInfo, defaultDescription }: Props) { + const documentation = schemaFieldEntity?.documentation?.documentations?.[0]; + const isUsingDocumentationAspect = !editableFieldInfo?.description && !!documentation; + const isPropagated = + isUsingDocumentationAspect && + !!documentation?.attribution?.sourceDetail?.find( + (mapEntry) => mapEntry.key === 'propagated' && mapEntry.value === 'true', + ); + + const displayedDescription = + editableFieldInfo?.description || documentation?.documentation || defaultDescription || ''; + + const sourceDetail = documentation?.attribution?.sourceDetail; + const propagatedDescription = documentation?.documentation; + + return { displayedDescription, isPropagated, sourceDetail, propagatedDescription }; +} diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/utils/useDescriptionRenderer.tsx b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/utils/useDescriptionRenderer.tsx index 73e6d2ca6e9b3e..bb70c2cb493037 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/utils/useDescriptionRenderer.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/utils/useDescriptionRenderer.tsx @@ -6,6 +6,7 @@ import { useUpdateDescriptionMutation } from '../../../../../../../graphql/mutat import { useMutationUrn, useRefetch } from '../../../../EntityContext'; import { useSchemaRefetch } from '../SchemaContext'; import { pathMatchesNewPath } from '../../../../../dataset/profile/schema/utils/utils'; +import { getFieldDescriptionDetails } from './getFieldDescriptionDetails'; export default function useDescriptionRenderer(editableSchemaMetadata: EditableSchemaMetadata | null | undefined) { const urn = useMutationUrn(); @@ -21,10 +22,16 @@ export default function useDescriptionRenderer(editableSchemaMetadata: EditableS }; return (description: string, record: SchemaField, index: number): JSX.Element => { - const relevantEditableFieldInfo = editableSchemaMetadata?.editableSchemaFieldInfo.find( - (candidateEditableFieldInfo) => pathMatchesNewPath(candidateEditableFieldInfo.fieldPath, record.fieldPath), + const editableFieldInfo = editableSchemaMetadata?.editableSchemaFieldInfo.find((candidateEditableFieldInfo) => + pathMatchesNewPath(candidateEditableFieldInfo.fieldPath, record.fieldPath), ); - const displayedDescription = relevantEditableFieldInfo?.description || description; + const { schemaFieldEntity } = record; + const { displayedDescription, isPropagated, sourceDetail } = getFieldDescriptionDetails({ + schemaFieldEntity, + editableFieldInfo, + defaultDescription: description, + }); + const sanitizedDescription = DOMPurify.sanitize(displayedDescription); const original = record.description ? DOMPurify.sanitize(record.description) : undefined; const businessAttributeDescription = @@ -43,7 +50,7 @@ export default function useDescriptionRenderer(editableSchemaMetadata: EditableS baExpanded={!!expandedBARows[index]} description={sanitizedDescription} original={original} - isEdited={!!relevantEditableFieldInfo?.description} + isEdited={!!editableFieldInfo?.description} onUpdate={(updatedDescription) => updateDescription({ variables: { @@ -56,6 +63,8 @@ export default function useDescriptionRenderer(editableSchemaMetadata: EditableS }, }).then(refresh) } + isPropagated={isPropagated} + sourceDetail={sourceDetail} isReadOnly /> ); diff --git a/datahub-web-react/src/app/entity/shared/useGetEntities.ts b/datahub-web-react/src/app/entity/shared/useGetEntities.ts new file mode 100644 index 00000000000000..9391bc17d7a8a2 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/useGetEntities.ts @@ -0,0 +1,18 @@ +import { useEffect, useState } from 'react'; +import { useGetEntitiesQuery } from '../../../graphql/entity.generated'; +import { Entity } from '../../../types.generated'; + +export function useGetEntities(urns: string[]): Entity[] { + const [verifiedUrns, setVerifiedUrns] = useState([]); + + useEffect(() => { + urns.forEach((urn) => { + if (urn.startsWith('urn:li:') && !verifiedUrns.includes(urn)) { + setVerifiedUrns((prevUrns) => [...prevUrns, urn]); + } + }); + }, [urns, verifiedUrns]); + + const { data } = useGetEntitiesQuery({ variables: { urns: verifiedUrns }, skip: !verifiedUrns.length }); + return (data?.entities || []) as Entity[]; +} diff --git a/datahub-web-react/src/app/lineage/LineageExplorer.tsx b/datahub-web-react/src/app/lineage/LineageExplorer.tsx index 26ffaa26a6ca22..ce0c4bb8f122d4 100644 --- a/datahub-web-react/src/app/lineage/LineageExplorer.tsx +++ b/datahub-web-react/src/app/lineage/LineageExplorer.tsx @@ -221,7 +221,9 @@ export default function LineageExplorer({ urn, type }: Props) { Close {selectedEntity.type !== EntityType.Restricted && ( - )} diff --git a/datahub-web-react/src/app/search/SearchResultList.tsx b/datahub-web-react/src/app/search/SearchResultList.tsx index d85c3674cbd43d..bc0efcfa3f47e8 100644 --- a/datahub-web-react/src/app/search/SearchResultList.tsx +++ b/datahub-web-react/src/app/search/SearchResultList.tsx @@ -62,6 +62,7 @@ type Props = { selectedEntities: EntityAndType[]; setSelectedEntities: (entities: EntityAndType[]) => any; suggestions: SearchSuggestion[]; + pageNumber: number; }; export const SearchResultList = ({ @@ -73,6 +74,7 @@ export const SearchResultList = ({ selectedEntities, setSelectedEntities, suggestions, + pageNumber, }: Props) => { const entityRegistry = useEntityRegistry(); const selectedEntityUrns = selectedEntities.map((entity) => entity.urn); @@ -86,6 +88,7 @@ export const SearchResultList = ({ entityType: result.entity.type, index, total: totalResultCount, + pageNumber, }); }; diff --git a/datahub-web-react/src/app/search/SearchResults.tsx b/datahub-web-react/src/app/search/SearchResults.tsx index dafe9a20b6ab7f..e96e8fd528b9e6 100644 --- a/datahub-web-react/src/app/search/SearchResults.tsx +++ b/datahub-web-react/src/app/search/SearchResults.tsx @@ -264,6 +264,7 @@ export const SearchResults = ({ selectedEntities={selectedEntities} setSelectedEntities={setSelectedEntities} suggestions={suggestions} + pageNumber={page} /> {totalResults > 0 && ( diff --git a/datahub-web-react/src/app/settings/SettingsPage.tsx b/datahub-web-react/src/app/settings/SettingsPage.tsx index 24bcd17ca7f9c0..e3948349546efb 100644 --- a/datahub-web-react/src/app/settings/SettingsPage.tsx +++ b/datahub-web-react/src/app/settings/SettingsPage.tsx @@ -121,7 +121,7 @@ export const SettingsPage = () => { const showViews = isViewsEnabled || false; const showOwnershipTypes = me && me?.platformPrivileges?.manageOwnershipTypes; const showHomePagePosts = me && me?.platformPrivileges?.manageGlobalAnnouncements && !readOnlyModeEnabled; - const showFeatures = true; // TODO: Add feature flag for this + const showFeatures = me?.platformPrivileges?.manageIngestion; // TODO: Add feature flag for this return ( diff --git a/datahub-web-react/src/app/settings/features/Feature.tsx b/datahub-web-react/src/app/settings/features/Feature.tsx index 2c090aae696f88..13453cf8f73252 100644 --- a/datahub-web-react/src/app/settings/features/Feature.tsx +++ b/datahub-web-react/src/app/settings/features/Feature.tsx @@ -104,6 +104,8 @@ export interface FeatureType { title: string; description: string; isAvailable: boolean; + isDisabled: boolean; + disabledMessage?: string; checked: boolean; onChange?: (checked: boolean) => void; }>; @@ -134,22 +136,6 @@ export const Feature = ({ key, title, description, settings, options, isNew, lea - {settings.map((option) => ( - <> - - - - {option.title} - - - - - - - - ))} {options.map((option, index) => ( <> @@ -165,15 +151,34 @@ export const Feature = ({ key, title, description, settings, options, isNew, lea {option.description} - (option.onChange ? option.onChange(checked) : null)} - disabled={!option.isAvailable} - /> + + (option.onChange ? option.onChange(checked) : null)} + disabled={!option.isAvailable || option.isDisabled} + /> + {index !== options.length - 1 && } ))} + {settings.map((option) => ( + <> + + + + {option.title} + Only available on DataHub Cloud + + + + + + + + ))} ); diff --git a/datahub-web-react/src/app/settings/features/Features.tsx b/datahub-web-react/src/app/settings/features/Features.tsx index ee8d7c628c1eff..1d0a0bb469cf86 100644 --- a/datahub-web-react/src/app/settings/features/Features.tsx +++ b/datahub-web-react/src/app/settings/features/Features.tsx @@ -73,18 +73,23 @@ export const Features = () => { setIsColPropagateChecked(checked); updateDocPropagation(checked); }, + isDisabled: false, + disabledMessage: undefined, }, { key: uuidv4(), title: 'Asset Level Propagation', description: 'Propagate new documentation from upstream to downstream assets based on data lineage relationships.', - isAvailable: false, checked: false, + onChange: (_: boolean) => null, + isAvailable: true, + isDisabled: true, + disabledMessage: 'Coming soon!', }, ], isNew: true, - learnMoreLink: 'https://datahubproject.io/docs/automations/doc-propagation', + learnMoreLink: 'https://datahubproject.io/docs/automations/docs-propagation', }, ]; diff --git a/docker/datahub-frontend/Dockerfile b/docker/datahub-frontend/Dockerfile index 2a9354cbf6a04f..89974e56575b07 100644 --- a/docker/datahub-frontend/Dockerfile +++ b/docker/datahub-frontend/Dockerfile @@ -25,7 +25,7 @@ RUN apk --no-cache --update-cache --available upgrade \ ENV LD_LIBRARY_PATH="/lib:/lib64" -FROM base as unpack +FROM base AS unpack COPY ./datahub-frontend.zip / RUN unzip datahub-frontend.zip -d /tmp/out \ @@ -33,16 +33,16 @@ RUN unzip datahub-frontend.zip -d /tmp/out \ COPY ./docker/monitoring/client-prometheus-config.yaml /datahub-frontend/ RUN chown -R datahub:datahub /datahub-frontend && chmod 755 /datahub-frontend -FROM base as prod-install +FROM base AS prod-install COPY --from=unpack /datahub-frontend/ /datahub-frontend/ -FROM base as dev-install +FROM base AS dev-install # Dummy stage for development. Assumes code is built on your machine and mounted to this image. # See this excellent thread https://github.com/docker/cli/issues/1134 VOLUME [ "/datahub-frontend" ] -FROM ${APP_ENV}-install as final +FROM ${APP_ENV}-install AS final COPY --chown=datahub:datahub --chmod=755 ./docker/datahub-frontend/start.sh / USER datahub diff --git a/docker/datahub-frontend/env/docker.env b/docker/datahub-frontend/env/docker.env index 655efb4b202bc0..7e490813c93cfd 100644 --- a/docker/datahub-frontend/env/docker.env +++ b/docker/datahub-frontend/env/docker.env @@ -22,9 +22,9 @@ JAVA_OPTS=-Xms512m -Xmx512m -Dhttp.port=9002 -Dconfig.file=datahub-frontend/conf # Uncomment & populate these configs to enable OIDC SSO in React application. # Required OIDC configs # AUTH_OIDC_ENABLED=true -# AUTH_OIDC_CLIENT_ID=1030786188615-rr9ics9gl8n4acngj9opqbf2mruflqpr.apps.googleusercontent.com -# AUTH_OIDC_CLIENT_SECRET=acEdaGcnfd7KxvsXRFDD7FNF -# AUTH_OIDC_DISCOVERY_URI=https://accounts.google.com/.well-known/openid-configuration +# AUTH_OIDC_CLIENT_ID= +# AUTH_OIDC_CLIENT_SECRET= +# AUTH_OIDC_DISCOVERY_URI=https:///.well-known/openid-configuration # AUTH_OIDC_BASE_URL=http://localhost:9001 # Optional OIDC configs # AUTH_OIDC_USER_NAME_CLAIM=email @@ -68,4 +68,4 @@ ELASTIC_CLIENT_PORT=9200 # To use simple username/password authentication to Elasticsearch over HTTPS # set ELASTIC_CLIENT_USE_SSL=true and uncomment: # ELASTIC_CLIENT_USERNAME= -# ELASTIC_CLIENT_PASSWORD= \ No newline at end of file +# ELASTIC_CLIENT_PASSWORD= diff --git a/docker/datahub-gms/Dockerfile b/docker/datahub-gms/Dockerfile index d30dbd84930578..b15bf3c6f9f17b 100644 --- a/docker/datahub-gms/Dockerfile +++ b/docker/datahub-gms/Dockerfile @@ -11,7 +11,7 @@ FROM golang:1-alpine3.20 AS binary # Re-declaring arg from above to make it available in this stage (will inherit default value) ARG ALPINE_REPO_URL -ENV DOCKERIZE_VERSION v0.6.1 +ENV DOCKERIZE_VERSION=v0.6.1 WORKDIR /go/src/github.com/jwilder # Optionally set corporate mirror for apk @@ -52,7 +52,7 @@ COPY --from=binary /go/bin/dockerize /usr/local/bin ENV LD_LIBRARY_PATH="/lib:/lib64" -FROM base as prod-install +FROM base AS prod-install COPY war.war /datahub/datahub-gms/bin/war.war COPY metadata-models/src/main/resources/entity-registry.yml /datahub/datahub-gms/resources/entity-registry.yml COPY docker/datahub-gms/start.sh /datahub/datahub-gms/scripts/start.sh @@ -61,11 +61,11 @@ COPY docker/datahub-gms/jetty-jmx.xml /datahub/datahub-gms/scripts/jetty-jmx.xml COPY docker/monitoring/client-prometheus-config.yaml /datahub/datahub-gms/scripts/prometheus-config.yaml RUN chmod +x /datahub/datahub-gms/scripts/start.sh -FROM base as dev-install +FROM base AS dev-install # Dummy stage for development. Assumes code is built on your machine and mounted to this image. # See this excellent thread https://github.com/docker/cli/issues/1134 -FROM ${APP_ENV}-install as final +FROM ${APP_ENV}-install AS final RUN mkdir -p /etc/datahub/plugins/auth/resources diff --git a/docker/datahub-gms/env/docker-without-neo4j.env b/docker/datahub-gms/env/docker-without-neo4j.env index 37b7ba1797af5b..cc0dd6b4278b56 100644 --- a/docker/datahub-gms/env/docker-without-neo4j.env +++ b/docker/datahub-gms/env/docker-without-neo4j.env @@ -23,8 +23,6 @@ PE_CONSUMER_ENABLED=true UI_INGESTION_ENABLED=true ENTITY_SERVICE_ENABLE_RETENTION=true -ELASTIC_ID_HASH_ALGO=MD5 - # Uncomment to disable persistence of client-side analytics events # DATAHUB_ANALYTICS_ENABLED=false diff --git a/docker/datahub-gms/env/docker.env b/docker/datahub-gms/env/docker.env index 0ecaa32c4cb123..59fc4bdde02ff4 100644 --- a/docker/datahub-gms/env/docker.env +++ b/docker/datahub-gms/env/docker.env @@ -27,8 +27,6 @@ MCE_CONSUMER_ENABLED=true PE_CONSUMER_ENABLED=true UI_INGESTION_ENABLED=true -ELASTIC_ID_HASH_ALGO=MD5 - # Uncomment to enable Metadata Service Authentication METADATA_SERVICE_AUTH_ENABLED=false diff --git a/docker/datahub-ingestion-base/Dockerfile b/docker/datahub-ingestion-base/Dockerfile index 8a238c32704bb6..a2686ee8b6557f 100644 --- a/docker/datahub-ingestion-base/Dockerfile +++ b/docker/datahub-ingestion-base/Dockerfile @@ -7,28 +7,13 @@ ARG GITHUB_REPO_URL=https://github.com ARG DEBIAN_REPO_URL=https://deb.debian.org/debian ARG PIP_MIRROR_URL=https://pypi.python.org/simple -FROM golang:1-alpine3.20 AS dockerize-binary +FROM powerman/dockerize:0.19 as dockerize-binary -# Re-declaring arg from above to make it available in this stage (will inherit default value) -ARG ALPINE_REPO_URL - -ENV DOCKERIZE_VERSION v0.6.1 -WORKDIR /go/src/github.com/jwilder - -# Optionally set corporate mirror for apk -RUN if [ "${ALPINE_REPO_URL}" != "http://dl-cdn.alpinelinux.org/alpine" ] ; then sed -i "s#http.*://dl-cdn.alpinelinux.org/alpine#${ALPINE_REPO_URL}#g" /etc/apk/repositories ; fi - -RUN apk --no-cache --update add openssl git tar curl - -WORKDIR /go/src/github.com/jwilder/dockerize - -RUN go install github.com/jwilder/dockerize@$DOCKERIZE_VERSION - -FROM python:3.10 as base +FROM python:3.10 AS base ARG GITHUB_REPO_URL -ENV DEBIAN_FRONTEND noninteractive +ENV DEBIAN_FRONTEND=noninteractive # Optionally set corporate mirror for deb ARG DEBIAN_REPO_URL @@ -56,8 +41,7 @@ RUN apt-get update && apt-get install -y -qq \ && python -m pip install --no-cache --upgrade pip uv>=0.1.10 wheel setuptools \ && rm -rf /var/lib/apt/lists/* /var/cache/apk/* -# compiled against newer golang for security fixes -COPY --from=dockerize-binary /go/bin/dockerize /usr/local/bin +COPY --from=dockerize-binary /usr/local/bin/dockerize /usr/local/bin COPY ./docker/datahub-ingestion-base/base-requirements.txt requirements.txt COPY ./docker/datahub-ingestion-base/entrypoint.sh /entrypoint.sh @@ -75,7 +59,7 @@ RUN python3 -m venv $VIRTUAL_ENV && \ ENTRYPOINT [ "/entrypoint.sh" ] -FROM ${BASE_IMAGE} as full-install +FROM ${BASE_IMAGE} AS full-install USER 0 RUN apt-get update && apt-get install -y -qq \ @@ -102,7 +86,7 @@ RUN if [ $(arch) = "x86_64" ]; then \ USER datahub -FROM ${BASE_IMAGE} as slim-install +FROM ${BASE_IMAGE} AS slim-install # Do nothing else on top of base FROM ${APP_ENV}-install diff --git a/docker/datahub-ingestion/Dockerfile b/docker/datahub-ingestion/Dockerfile index b8eda548491224..34ac6ae9eba584 100644 --- a/docker/datahub-ingestion/Dockerfile +++ b/docker/datahub-ingestion/Dockerfile @@ -5,7 +5,7 @@ ARG DOCKER_VERSION=head-full ARG DEBIAN_REPO_URL=https://deb.debian.org/debian ARG PIP_MIRROR_URL=https://pypi.python.org/simple -FROM $BASE_IMAGE:$DOCKER_VERSION as base +FROM $BASE_IMAGE:$DOCKER_VERSION AS base # Optionally set corporate mirror for deb USER 0 @@ -28,11 +28,11 @@ RUN sed -i.bak "s/__version__ = \"1\!0.0.0.dev0\"/__version__ = \"$(echo $RELEAS cat src/datahub/__init__.py | grep __version__ && \ cat airflow-plugin/src/datahub_airflow_plugin/__init__.py | grep __version__ -FROM base as slim-install +FROM base AS slim-install RUN uv pip install --no-cache -e ".[base,datahub-rest,datahub-kafka,snowflake,bigquery,redshift,mysql,postgres,hive,clickhouse,glue,dbt,looker,lookml,tableau,powerbi,superset,datahub-business-glossary]" -FROM base as full-install-build +FROM base AS full-install-build USER 0 RUN apt-get update && apt-get install -y -qq maven @@ -44,14 +44,14 @@ RUN uv pip install --no-cache -e ".[base,all]" "./airflow-plugin[plugin-v2]" && datahub --version RUN ./pyspark_jars.sh -FROM base as full-install +FROM base AS full-install COPY --from=full-install-build ${VIRTUAL_ENV} ${VIRTUAL_ENV} -FROM base as dev-install +FROM base AS dev-install # Dummy stage for development. Assumes code is built on your machine and mounted to this image. # See this excellent thread https://github.com/docker/cli/issues/1134 -FROM ${APP_ENV}-install as final +FROM ${APP_ENV}-install AS final USER datahub diff --git a/docker/datahub-mae-consumer/Dockerfile b/docker/datahub-mae-consumer/Dockerfile index 0ee55821f2579f..6edaa29ee1a8bb 100644 --- a/docker/datahub-mae-consumer/Dockerfile +++ b/docker/datahub-mae-consumer/Dockerfile @@ -11,7 +11,7 @@ FROM golang:1-alpine3.20 AS binary # Re-declaring arg from above to make it available in this stage (will inherit default value) ARG ALPINE_REPO_URL -ENV DOCKERIZE_VERSION v0.6.1 +ENV DOCKERIZE_VERSION=v0.6.1 WORKDIR /go/src/github.com/jwilder # Optionally set corporate mirror for apk @@ -47,18 +47,18 @@ COPY --from=binary /go/bin/dockerize /usr/local/bin ENV LD_LIBRARY_PATH="/lib:/lib64" -FROM base as prod-install +FROM base AS prod-install COPY mae-consumer-job.jar /datahub/datahub-mae-consumer/bin/ COPY metadata-models/src/main/resources/entity-registry.yml /datahub/datahub-mae-consumer/resources/entity-registry.yml COPY docker/datahub-mae-consumer/start.sh /datahub/datahub-mae-consumer/scripts/ COPY docker/monitoring/client-prometheus-config.yaml /datahub/datahub-mae-consumer/scripts/prometheus-config.yaml RUN chmod +x /datahub/datahub-mae-consumer/scripts/start.sh -FROM base as dev-install +FROM base AS dev-install # Dummy stage for development. Assumes code is built on your machine and mounted to this image. # See this excellent thread https://github.com/docker/cli/issues/1134 -FROM ${APP_ENV}-install as final +FROM ${APP_ENV}-install AS final RUN addgroup -S datahub && adduser -S datahub -G datahub USER datahub diff --git a/docker/datahub-mae-consumer/env/docker-without-neo4j.env b/docker/datahub-mae-consumer/env/docker-without-neo4j.env index 6a82f235b29711..b6899f7e6d63b2 100644 --- a/docker/datahub-mae-consumer/env/docker-without-neo4j.env +++ b/docker/datahub-mae-consumer/env/docker-without-neo4j.env @@ -13,8 +13,6 @@ ES_BULK_REFRESH_POLICY=WAIT_UNTIL GRAPH_SERVICE_IMPL=elasticsearch ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-mae-consumer/resources/entity-registry.yml -ELASTIC_ID_HASH_ALGO=MD5 - # Uncomment to disable persistence of client-side analytics events # DATAHUB_ANALYTICS_ENABLED=false diff --git a/docker/datahub-mae-consumer/env/docker.env b/docker/datahub-mae-consumer/env/docker.env index 1f0ee4b05b3820..5a6daa6eaeaed7 100644 --- a/docker/datahub-mae-consumer/env/docker.env +++ b/docker/datahub-mae-consumer/env/docker.env @@ -17,8 +17,6 @@ NEO4J_PASSWORD=datahub GRAPH_SERVICE_IMPL=neo4j ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-mae-consumer/resources/entity-registry.yml -ELASTIC_ID_HASH_ALGO=MD5 - # Uncomment to disable persistence of client-side analytics events # DATAHUB_ANALYTICS_ENABLED=false diff --git a/docker/datahub-mce-consumer/Dockerfile b/docker/datahub-mce-consumer/Dockerfile index 8f85b432a10711..1eb56633c561e6 100644 --- a/docker/datahub-mce-consumer/Dockerfile +++ b/docker/datahub-mce-consumer/Dockerfile @@ -11,7 +11,7 @@ FROM golang:1-alpine3.20 AS binary # Re-declaring arg from above to make it available in this stage (will inherit default value) ARG ALPINE_REPO_URL -ENV DOCKERIZE_VERSION v0.6.1 +ENV DOCKERIZE_VERSION=v0.6.1 WORKDIR /go/src/github.com/jwilder # Optionally set corporate mirror for apk @@ -45,7 +45,7 @@ RUN apk --no-cache --update-cache --available upgrade \ && cp /usr/lib/jvm/java-17-openjdk/jre/lib/security/cacerts /tmp/kafka.client.truststore.jks COPY --from=binary /go/bin/dockerize /usr/local/bin -FROM base as prod-install +FROM base AS prod-install COPY mce-consumer-job.jar /datahub/datahub-mce-consumer/bin/ COPY metadata-models/src/main/resources/entity-registry.yml /datahub/datahub-mce-consumer/resources/entity-registry.yml COPY docker/datahub-mce-consumer/start.sh /datahub/datahub-mce-consumer/scripts/ @@ -54,12 +54,12 @@ RUN chmod +x /datahub/datahub-mce-consumer/scripts/start.sh ENV LD_LIBRARY_PATH="/lib:/lib64" -FROM base as dev-install +FROM base AS dev-install # Dummy stage for development. Assumes code is built on your machine and mounted to this image. # See this excellent thread https://github.com/docker/cli/issues/1134 COPY metadata-models/src/main/resources/entity-registry.yml /datahub/datahub-mce-consumer/resources/entity-registry.yml -FROM ${APP_ENV}-install as final +FROM ${APP_ENV}-install AS final RUN addgroup -S datahub && adduser -S datahub -G datahub USER datahub diff --git a/docker/datahub-mce-consumer/env/docker-without-neo4j.env b/docker/datahub-mce-consumer/env/docker-without-neo4j.env index b0edfc0a75b669..e7be7d8ed4ddc5 100644 --- a/docker/datahub-mce-consumer/env/docker-without-neo4j.env +++ b/docker/datahub-mce-consumer/env/docker-without-neo4j.env @@ -24,8 +24,6 @@ MAE_CONSUMER_ENABLED=false PE_CONSUMER_ENABLED=false UI_INGESTION_ENABLED=false -ELASTIC_ID_HASH_ALGO=MD5 - # Uncomment to configure kafka topic names # Make sure these names are consistent across the whole deployment # METADATA_CHANGE_PROPOSAL_TOPIC_NAME=MetadataChangeProposal_v1 diff --git a/docker/datahub-mce-consumer/env/docker.env b/docker/datahub-mce-consumer/env/docker.env index c0f85ef667546e..8618f3f5f7af7a 100644 --- a/docker/datahub-mce-consumer/env/docker.env +++ b/docker/datahub-mce-consumer/env/docker.env @@ -24,8 +24,6 @@ MAE_CONSUMER_ENABLED=false PE_CONSUMER_ENABLED=false UI_INGESTION_ENABLED=false -ELASTIC_ID_HASH_ALGO=MD5 - # Uncomment to configure kafka topic names # Make sure these names are consistent across the whole deployment # METADATA_CHANGE_PROPOSAL_TOPIC_NAME=MetadataChangeProposal_v1 diff --git a/docker/datahub-upgrade/Dockerfile b/docker/datahub-upgrade/Dockerfile index 675e24ab87109e..3d59a903414b1a 100644 --- a/docker/datahub-upgrade/Dockerfile +++ b/docker/datahub-upgrade/Dockerfile @@ -11,7 +11,7 @@ FROM golang:1-alpine3.20 AS binary # Re-declaring arg from above to make it available in this stage (will inherit default value) ARG ALPINE_REPO_URL -ENV DOCKERIZE_VERSION v0.6.1 +ENV DOCKERIZE_VERSION=v0.6.1 WORKDIR /go/src/github.com/jwilder # Optionally set corporate mirror for apk @@ -51,15 +51,15 @@ COPY --from=binary /go/bin/dockerize /usr/local/bin ENV LD_LIBRARY_PATH="/lib:/lib64" -FROM base as prod-install +FROM base AS prod-install COPY datahub-upgrade.jar /datahub/datahub-upgrade/bin/ COPY metadata-models/src/main/resources/entity-registry.yml /datahub/datahub-gms/resources/entity-registry.yml -FROM base as dev-install +FROM base AS dev-install # Dummy stage for development. Assumes code is built on your machine and mounted to this image. # See this excellent thread https://github.com/docker/cli/issues/1134 -FROM ${APP_ENV}-install as final +FROM ${APP_ENV}-install AS final RUN addgroup -S datahub && adduser -S datahub -G datahub USER datahub diff --git a/docker/elasticsearch-setup/Dockerfile b/docker/elasticsearch-setup/Dockerfile index 7390e3579dcf8f..4e64dcbc1e452c 100644 --- a/docker/elasticsearch-setup/Dockerfile +++ b/docker/elasticsearch-setup/Dockerfile @@ -10,7 +10,7 @@ FROM golang:1-alpine3.20 AS binary ARG ALPINE_REPO_URL -ENV DOCKERIZE_VERSION v0.6.1 +ENV DOCKERIZE_VERSION=v0.6.1 WORKDIR /go/src/github.com/jwilder # Optionally set corporate mirror for apk diff --git a/docker/kafka-setup/Dockerfile b/docker/kafka-setup/Dockerfile index a68da4e41d4df9..dd88060cd7165f 100644 --- a/docker/kafka-setup/Dockerfile +++ b/docker/kafka-setup/Dockerfile @@ -1,4 +1,4 @@ -ARG KAFKA_DOCKER_VERSION=7.4.4 +ARG KAFKA_DOCKER_VERSION=7.4.6 # Defining custom repo urls for use in enterprise environments. Re-used between stages below. ARG ALPINE_REPO_URL=http://dl-cdn.alpinelinux.org/alpine @@ -6,8 +6,8 @@ ARG GITHUB_REPO_URL=https://github.com ARG MAVEN_CENTRAL_REPO_URL=https://repo1.maven.org/maven2 ARG APACHE_DOWNLOAD_URL=null -# Using as a base image because to get the needed jars for confluent utils -FROM confluentinc/cp-base-new:$KAFKA_DOCKER_VERSION as confluent_base +# Using AS a base image because to get the needed jars for confluent utils +FROM confluentinc/cp-base-new:$KAFKA_DOCKER_VERSION AS confluent_base ARG MAVEN_CENTRAL_REPO_URL ARG SNAKEYAML_VERSION="2.0" @@ -22,8 +22,8 @@ ARG ALPINE_REPO_URL ARG APACHE_DOWNLOAD_URL ARG GITHUB_REPO_URL -ENV KAFKA_VERSION 3.7.0 -ENV SCALA_VERSION 2.13 +ENV KAFKA_VERSION=3.7.1 +ENV SCALA_VERSION=2.13 LABEL name="kafka" version=${KAFKA_VERSION} @@ -44,7 +44,7 @@ RUN mkdir -p /opt \ && rm -rf /tmp/* \ && apk del --purge .build-deps -ENV PATH /sbin:/opt/kafka/bin/:$PATH +ENV PATH=/sbin:/opt/kafka/bin/:$PATH WORKDIR /opt/kafka @@ -71,6 +71,7 @@ COPY docker/kafka-setup/kafka-setup.sh ./kafka-setup.sh COPY docker/kafka-setup/kafka-config.sh ./kafka-config.sh COPY docker/kafka-setup/kafka-topic-workers.sh ./kafka-topic-workers.sh COPY docker/kafka-setup/kafka-ready.sh ./kafka-ready.sh +COPY docker/kafka-setup/env_to_properties.py ./env_to_properties.py RUN chmod +x ./kafka-setup.sh ./kafka-topic-workers.sh ./kafka-ready.sh diff --git a/docker/mysql-setup/Dockerfile b/docker/mysql-setup/Dockerfile index 46969352d81746..b0ca45ad8f6f24 100644 --- a/docker/mysql-setup/Dockerfile +++ b/docker/mysql-setup/Dockerfile @@ -5,7 +5,7 @@ FROM golang:1-alpine3.20 AS binary ARG ALPINE_REPO_URL -ENV DOCKERIZE_VERSION v0.6.1 +ENV DOCKERIZE_VERSION=v0.6.1 WORKDIR /go/src/github.com/jwilder # Optionally set corporate mirror for apk diff --git a/docker/postgres-setup/Dockerfile b/docker/postgres-setup/Dockerfile index 8ab211218f2406..e145456e807d4d 100644 --- a/docker/postgres-setup/Dockerfile +++ b/docker/postgres-setup/Dockerfile @@ -5,7 +5,7 @@ FROM golang:1-alpine3.20 AS binary ARG ALPINE_REPO_URL -ENV DOCKERIZE_VERSION v0.6.1 +ENV DOCKERIZE_VERSION=v0.6.1 WORKDIR /go/src/github.com/jwilder # Optionally set corporate mirror for apk diff --git a/docker/profiles/docker-compose.frontend.yml b/docker/profiles/docker-compose.frontend.yml index b43db8297cb1e0..b5b2d50143927f 100644 --- a/docker/profiles/docker-compose.frontend.yml +++ b/docker/profiles/docker-compose.frontend.yml @@ -10,6 +10,7 @@ x-datahub-frontend-service: &datahub-frontend-service - ${DATAHUB_LOCAL_FRONTEND_ENV:-empty2.env} environment: &datahub-frontend-service-env KAFKA_BOOTSTRAP_SERVER: broker:29092 + DATAHUB_GMS_HOST: ${DATAHUB_GMS_HOST:-datahub-gms} volumes: - ${HOME}/.datahub/plugins:/etc/datahub/plugins diff --git a/docker/profiles/docker-compose.gms.yml b/docker/profiles/docker-compose.gms.yml index 8cfff2280e2fea..c9448fa34c6870 100644 --- a/docker/profiles/docker-compose.gms.yml +++ b/docker/profiles/docker-compose.gms.yml @@ -40,6 +40,7 @@ x-kafka-env: &kafka-env # KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 SCHEMA_REGISTRY_TYPE: INTERNAL KAFKA_SCHEMAREGISTRY_URL: http://datahub-gms:8080/schema-registry/api/ + SPRING_KAFKA_CONSUMER_AUTO_OFFSET_RESET: ${SPRING_KAFKA_CONSUMER_AUTO_OFFSET_RESET:-earliest} x-datahub-quickstart-telemetry-env: &datahub-quickstart-telemetry-env DATAHUB_SERVER_TYPE: ${DATAHUB_SERVER_TYPE:-quickstart} diff --git a/docker/profiles/docker-compose.prerequisites.yml b/docker/profiles/docker-compose.prerequisites.yml index 7cd9c9039539cc..eed23a749628fe 100644 --- a/docker/profiles/docker-compose.prerequisites.yml +++ b/docker/profiles/docker-compose.prerequisites.yml @@ -234,7 +234,7 @@ services: env_file: kafka-broker/env/docker.env environment: KAFKA_NODE_ID: 1 - KAFKA_ADVERTISED_LISTENERS: BROKER://broker:29092,EXTERNAL://broker:9092 + KAFKA_ADVERTISED_LISTENERS: BROKER://broker:29092,EXTERNAL://localhost:9092 KAFKA_LISTENERS: BROKER://broker:29092,EXTERNAL://broker:9092,CONTROLLER://broker:39092 KAFKA_INTER_BROKER_LISTENER_NAME: BROKER KAFKA_CONTROLLER_LISTENER_NAMES: CONTROLLER diff --git a/docker/quickstart/docker-compose-m1.quickstart.yml b/docker/quickstart/docker-compose-m1.quickstart.yml index a0f60d23710a07..834d55096468f6 100644 --- a/docker/quickstart/docker-compose-m1.quickstart.yml +++ b/docker/quickstart/docker-compose-m1.quickstart.yml @@ -86,7 +86,6 @@ services: - ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true - ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX=true - ELASTICSEARCH_PORT=9200 - - ELASTIC_ID_HASH_ALGO=MD5 - ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml - ENTITY_SERVICE_ENABLE_RETENTION=true - ES_BULK_REFRESH_POLICY=WAIT_UNTIL diff --git a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml index 11e33a9950ba9b..47fb50f78e4f0c 100644 --- a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml +++ b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml @@ -86,7 +86,6 @@ services: - ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true - ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX=true - ELASTICSEARCH_PORT=9200 - - ELASTIC_ID_HASH_ALGO=MD5 - ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml - ENTITY_SERVICE_ENABLE_RETENTION=true - ES_BULK_REFRESH_POLICY=WAIT_UNTIL diff --git a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml index 2efa8959834183..3fa13a9e56b421 100644 --- a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml +++ b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml @@ -86,7 +86,6 @@ services: - ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true - ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX=true - ELASTICSEARCH_PORT=9200 - - ELASTIC_ID_HASH_ALGO=MD5 - ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml - ENTITY_SERVICE_ENABLE_RETENTION=true - ES_BULK_REFRESH_POLICY=WAIT_UNTIL diff --git a/docker/quickstart/docker-compose.consumers-without-neo4j.quickstart.yml b/docker/quickstart/docker-compose.consumers-without-neo4j.quickstart.yml index 4f47a3da24eb1b..a4211acedcf102 100644 --- a/docker/quickstart/docker-compose.consumers-without-neo4j.quickstart.yml +++ b/docker/quickstart/docker-compose.consumers-without-neo4j.quickstart.yml @@ -19,7 +19,6 @@ services: - ES_BULK_REFRESH_POLICY=WAIT_UNTIL - GRAPH_SERVICE_IMPL=elasticsearch - ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-mae-consumer/resources/entity-registry.yml - - ELASTIC_ID_HASH_ALGO=MD5 hostname: datahub-mae-consumer image: ${DATAHUB_MAE_CONSUMER_IMAGE:-acryldata/datahub-mae-consumer}:${DATAHUB_VERSION:-head} ports: @@ -38,7 +37,6 @@ services: - EBEAN_DATASOURCE_USERNAME=datahub - ELASTICSEARCH_HOST=elasticsearch - ELASTICSEARCH_PORT=9200 - - ELASTIC_ID_HASH_ALGO=MD5 - ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-mce-consumer/resources/entity-registry.yml - ENTITY_SERVICE_ENABLE_RETENTION=true - ES_BULK_REFRESH_POLICY=WAIT_UNTIL diff --git a/docker/quickstart/docker-compose.consumers.quickstart.yml b/docker/quickstart/docker-compose.consumers.quickstart.yml index 7dd7388b939884..e7571e4baf8b4e 100644 --- a/docker/quickstart/docker-compose.consumers.quickstart.yml +++ b/docker/quickstart/docker-compose.consumers.quickstart.yml @@ -26,7 +26,6 @@ services: - NEO4J_PASSWORD=datahub - GRAPH_SERVICE_IMPL=neo4j - ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-mae-consumer/resources/entity-registry.yml - - ELASTIC_ID_HASH_ALGO=MD5 hostname: datahub-mae-consumer image: ${DATAHUB_MAE_CONSUMER_IMAGE:-acryldata/datahub-mae-consumer}:${DATAHUB_VERSION:-head} ports: @@ -48,7 +47,6 @@ services: - EBEAN_DATASOURCE_USERNAME=datahub - ELASTICSEARCH_HOST=elasticsearch - ELASTICSEARCH_PORT=9200 - - ELASTIC_ID_HASH_ALGO=MD5 - ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-mce-consumer/resources/entity-registry.yml - ENTITY_SERVICE_ENABLE_RETENTION=true - ES_BULK_REFRESH_POLICY=WAIT_UNTIL diff --git a/docker/quickstart/docker-compose.quickstart.yml b/docker/quickstart/docker-compose.quickstart.yml index f42ed1f40c2467..c63b6d1d61b030 100644 --- a/docker/quickstart/docker-compose.quickstart.yml +++ b/docker/quickstart/docker-compose.quickstart.yml @@ -86,7 +86,6 @@ services: - ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true - ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX=true - ELASTICSEARCH_PORT=9200 - - ELASTIC_ID_HASH_ALGO=MD5 - ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml - ENTITY_SERVICE_ENABLE_RETENTION=true - ES_BULK_REFRESH_POLICY=WAIT_UNTIL diff --git a/docs-website/build.gradle b/docs-website/build.gradle index 798047a562ffd2..803112bf857166 100644 --- a/docs-website/build.gradle +++ b/docs-website/build.gradle @@ -86,6 +86,7 @@ task yarnGenerate(type: YarnTask, dependsOn: [yarnInstall, ':metadata-ingestion:buildWheel', ':metadata-ingestion-modules:airflow-plugin:buildWheel', ':metadata-ingestion-modules:dagster-plugin:buildWheel', + ':metadata-ingestion-modules:gx-plugin:buildWheel', ]) { inputs.files(projectMdFiles) outputs.cacheIf { true } diff --git a/docs-website/download_historical_versions.py b/docs-website/download_historical_versions.py index 7493210ffa2a5f..0998d8d9972624 100644 --- a/docs-website/download_historical_versions.py +++ b/docs-website/download_historical_versions.py @@ -3,6 +3,7 @@ import tarfile import time import urllib.request +import shutil repo_url = "https://api.github.com/repos/datahub-project/static-assets" @@ -18,7 +19,7 @@ def download_file(url, destination): def fetch_urls( - repo_url: str, folder_path: str, file_format: str, max_retries=3, retry_delay=5 + repo_url: str, folder_path: str, file_format: str, active_versions: list, max_retries=3, retry_delay=5 ): api_url = f"{repo_url}/contents/{folder_path}" for attempt in range(max_retries + 1): @@ -30,7 +31,7 @@ def fetch_urls( urls = [ file["download_url"] for file in json.loads(data) - if file["name"].endswith(file_format) + if file["name"].endswith(file_format) and any(version in file["name"] for version in active_versions) ] print(urls) return urls @@ -48,12 +49,22 @@ def extract_tar_file(destination_path): tar.extractall() os.remove(destination_path) +def get_active_versions(): + # read versions.json + with open("versions.json") as f: + versions = json.load(f) + return versions + +def clear_directory(directory): + if os.path.exists(directory): + shutil.rmtree(directory) + os.makedirs(directory) def download_versioned_docs(folder_path: str, destination_dir: str, file_format: str): - if not os.path.exists(destination_dir): - os.makedirs(destination_dir) + clear_directory(destination_dir) # Clear the directory before downloading - urls = fetch_urls(repo_url, folder_path, file_format) + active_versions = get_active_versions() + urls = fetch_urls(repo_url, folder_path, file_format, active_versions) for url in urls: filename = os.path.basename(url) diff --git a/docs-website/generateDocsDir.ts b/docs-website/generateDocsDir.ts index 23888d9000161d..ceac79bd5cad37 100644 --- a/docs-website/generateDocsDir.ts +++ b/docs-website/generateDocsDir.ts @@ -573,6 +573,7 @@ function copy_python_wheels(): void { "../metadata-ingestion/dist", "../metadata-ingestion-modules/airflow-plugin/dist", "../metadata-ingestion-modules/dagster-plugin/dist", + "../metadata-ingestion-modules/gx-plugin/dist", ]; const wheel_output_directory = path.join(STATIC_DIRECTORY, "wheels"); diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index a3aa54657d0675..076bc5aa3bf188 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -98,6 +98,23 @@ module.exports = { }, ], }, + { + label: "Automations", + type: "category", + items: [ + { + label: "Documentation Propagation", + type: "doc", + id: "docs/automation/docs-propagation", + }, + { + label: "Snowflake Tag Sync", + type: "doc", + id: "docs/automation/snowflake-tag-propagation", + className: "saasOnly", + }, + ], + }, { label: "Business Attributes", type: "doc", @@ -527,7 +544,12 @@ module.exports = { "Advanced Guides": [ "docs/how/delete-metadata", "docs/how/configuring-authorization-with-apache-ranger", - "docs/managed-datahub/configuring-identity-provisioning-with-ms-entra", + { + "SCIM Provisioning": [ + "docs/managed-datahub/configuring-identity-provisioning-with-ms-entra", + "docs/managed-datahub/configuring-identity-provisioning-with-okta", + ], + }, "docs/how/backup-datahub", "docs/how/restore-indices", "docs/advanced/db-retention", @@ -895,6 +917,7 @@ module.exports = { // "metadata-integration/java/openlineage-converter/README" //"metadata-ingestion-modules/airflow-plugin/README" //"metadata-ingestion-modules/dagster-plugin/README" + //"metadata-ingestion-modules/gx-plugin/README" // "metadata-ingestion/schedule_docs/datahub", // we can delete this // TODO: change the titles of these, removing the "What is..." portion from the sidebar" // "docs/what/entity", @@ -925,6 +948,7 @@ module.exports = { // - "metadata-service/services/README" // "metadata-ingestion/examples/structured_properties/README" // "smoke-test/tests/openapi/README" + // "docs/SECURITY_STANCE" // ], ], }; diff --git a/docs-website/versions.json b/docs-website/versions.json index afd30a317c618b..5288c42437c779 100644 --- a/docs-website/versions.json +++ b/docs-website/versions.json @@ -1,3 +1,4 @@ [ + "0.14.0", "0.13.1" ] diff --git a/docs/SECURITY_STANCE.md b/docs/SECURITY_STANCE.md new file mode 100644 index 00000000000000..a48244d741b70b --- /dev/null +++ b/docs/SECURITY_STANCE.md @@ -0,0 +1,81 @@ +# DataHub's Commitment to Security + +## Introduction + +The open-source DataHub project takes security seriously. As part of our commitment to maintaining a secure environment +for our users and contributors, we have established a comprehensive security policy. This document outlines the key +aspects of our approach to handling security vulnerabilities and keeping our community informed. + +## Our Track Record + +We have a proactive approach to security. To date we've successfully resolved many security related issues reported by +community members or flagged by automated scanners (which includes upstream dependencies and what known risks the +dependencies contain), demonstrating our commitment to maintaining a secure platform. This is a testament to the +collaborative efforts of our community in identifying and helping us address potential vulnerabilities. It truly takes +a village. + +## Reporting Security Issues + +If you believe you've discovered a security vulnerability in DataHub, we encourage you to report it immediately. We have +a dedicated process for handling security-related issues to ensure they're addressed promptly and discreetly. + +For detailed instructions on how to report a security vulnerability, including our PGP key for encrypted communications, +please visit our official security policy page: + +[DataHub Security Policy](https://github.com/datahub-project/datahub/security/policy) + +We kindly ask that you do not disclose the vulnerability publicly until the committers have had the chance to address it +and make an announcement. + +## Our Response Process + +Once a security issue is reported, the project follows a structured process to ensure that each report is handled with +the attention and urgency it deserves. This includes: + +1. Verifying the reported vulnerability +2. Assessing its potential impact +3. Developing and testing a fix +4. Releasing security patches +5. Coordinating the public disclosure of the vulnerability + +All reported vulnerabilities are carefully assessed and triaged internally to ensure appropriate action is taken. + +## How we prioritize (and the dangers of blindly following automated scanners) + +While we appreciate the value of automated vulnerability detection systems like Dependabot, we want to emphasize the +importance of critical thinking when addressing flagged issues. These systems are excellent at providing signals of +potential vulnerabilities, but they shouldn't be followed blindly. + +Here's why: + +1. Context matters: An issue flagged might only affect a non-serving component of the stack (such as our docs-website + code or our CI smoke tests), which may not pose a significant risk to the overall system. + +2. False positives: Sometimes, these systems may flag vulnerabilities in libraries that are linked but not actively + used. For example, a vulnerability in an email library might be flagged even if the software never sends emails. + +3. Exploit feasibility: Some vulnerabilities may be technically present but extremely difficult or impractical to + exploit in real-world scenarios. Automated scanners often don't consider the actual implementation details or + security controls that might mitigate the risk. For example, a reported SQL injection vulnerability might exist in + theory, but if the application uses parameterized queries or has proper input validation in place, the actual risk + could be significantly lower than the scanner suggests. + +We carefully review all automated alerts in the context of our specific implementation to determine the actual risk and +appropriate action. + +## Keeping the Community Informed + +Transparency is key in maintaining trust within our open-source community. To keep everyone informed about +security-related matters: + +- We maintain Security Advisories on the DataHub project GitHub repository +- These advisories include summaries of security issues, details on the fixes implemented, and any necessary mitigation + steps for users + +## Conclusion + +Security is an ongoing process, and we're committed to continuously improving our practices. By working together with +our community of users and contributors, we aim to maintain DataHub as a secure and reliable metadata platform. + +We encourage all users to stay updated with our security announcements and to promptly apply any security patches +released. Together, we can ensure a safer environment for everyone in the DataHub community. diff --git a/docs/authorization/policies.md b/docs/authorization/policies.md index b393c8ffa37577..45d0b59e408337 100644 --- a/docs/authorization/policies.md +++ b/docs/authorization/policies.md @@ -173,12 +173,13 @@ These privileges are for DataHub operators to access & manage the administrative | View Tests | View Asset Tests. | | Manage Tests[^2] | Allow actor to create and remove Asset Tests. | | View Metadata Proposals[^2] | Allow actor to view the requests tab for viewing metadata proposals. | -| Create metadata constraints[^2] | Allow actor to create metadata constraints. | +| Create metadata constraints[^3] | Allow actor to create metadata constraints. | | Manage Platform Settings[^2] | Allow actor to view and change platform-level settings, like integrations & notifications. | | Manage Monitors[^2] | Allow actor to create, update, and delete any data asset monitors, including Custom SQL monitors. Grant with care. | [^1]: Only active if REST_API_AUTHORIZATION_ENABLED is true [^2]: DataHub Cloud only +[^3]: Deprecated feature #### Entity Management diff --git a/docs/authorization/roles.md b/docs/authorization/roles.md index 7c7b4581faffca..3e0666a7759c86 100644 --- a/docs/authorization/roles.md +++ b/docs/authorization/roles.md @@ -156,10 +156,12 @@ These privileges are only relevant to DataHub Cloud. |-----------------------------|--------------------|--------------------|--------|-----------------------------------------------------------------------------------------------------| | Manage Tests | :heavy_check_mark: | :heavy_check_mark: | :x: | Create and remove Asset Tests. | | View Metadata Proposals | :heavy_check_mark: | :heavy_check_mark: | :x: | View the requests tab for viewing metadata proposals. | -| Create metadata constraints | :heavy_check_mark: | :heavy_check_mark: | :x: | Create metadata constraints. | +| Create metadata constraints[^1] | :heavy_check_mark: | :heavy_check_mark: | :x: | Create metadata constraints. | | Manage Platform Settings | :heavy_check_mark: | :x: | :x: | View and change platform-level settings, like integrations & notifications. | | Manage Monitors | :heavy_check_mark: | :x: | :x: | Create, update, and delete any data asset monitors, including Custom SQL monitors. Grant with care. | +[^1]: Deprecated feature + ##### Metadata Privileges | Privilege | Admin | Editor | Reader | Description | @@ -177,6 +179,7 @@ These privileges are only relevant to DataHub Cloud. | Manage Documentation Proposals | :heavy_check_mark: | :heavy_check_mark: | :x: | The ability to manage a proposal update an asset's documentation | | Manage Group Notification Settings | :heavy_check_mark: | :heavy_check_mark: | :x: | The ability to manage notification settings for a group. | | Manage Group Subscriptions | :heavy_check_mark: | :heavy_check_mark: | :x: | The ability to manage subscriptions for a group. | +| Manage User Subscriptions | :heavy_check_mark: | :x: | :x: | The ability to manage subscriptions for another user. | | Manage Data Contract Proposals | :heavy_check_mark: | :heavy_check_mark: | :x: | The ability to manage a proposal for a Data Contract | | Share Entity | :heavy_check_mark: | :heavy_check_mark: | :x: | The ability to share an entity with another DataHub Cloud instance. | diff --git a/docs/automation/docs-propagation.md b/docs/automation/docs-propagation.md new file mode 100644 index 00000000000000..a637afcde4dca7 --- /dev/null +++ b/docs/automation/docs-propagation.md @@ -0,0 +1,128 @@ +# Documentation Propagation Automation + +## Introduction + +Documentation Propagation is an automation automatically propagates column and asset (coming soon) descriptions based on downstream column-level lineage and sibling relationships. +It simplifies metadata management by ensuring consistency and reducing the manual effort required for documenting data assets to aid +in Data Governance & Compliance along with Data Discovery. + +This feature is enabled by default in Open Source DataHub. + +## Capabilities + +### Open Source +- **Column-Level Docs Propagation**: Automatically propagate documentation to downstream columns and sibling columns that are derived or dependent on the source column. +- **(Coming Soon) Asset-Level Docs Propagation**: Propagate descriptions to sibling assets. + +### DataHub Cloud (Acryl) +- Includes all the features of Open Source. +- **Propagation Rollback (Undo)**: Offers the ability to undo any propagation changes, providing a safety net against accidental updates. +- **Historical Backfilling**: Automatically backfills historical data for newly documented columns to maintain consistency across time. + +### Comparison of Features + +| Feature | Open Source | DataHub Cloud | +|---------------------------------|-------------|---------------| +| Column-Level Docs Propagation | ✔️ | ✔️ | +| Asset-Level Docs Propagation | ✔️ | ✔️ | +| Downstream Lineage + Siblings | ✔️ | ✔️ | +| Propagation Rollback (Undo) | ❌ | ✔️ | +| Historical Backfilling | ❌ | ✔️ | + +## Enabling Documentation Propagation + +### In Open Source + +Notice that the user must have the `Manage Ingestion` permission to view and enable the feature. + +1. **Navigate to Settings**: Click on the 'Settings' gear in top navigation bar. + +

+ +

+ +2. **Navigate to Features**: Click on the 'Features' tab in the left-hand navigation bar. + +

+ +

+ +3**Enable Documentation Propagation**: Locate the 'Documentation Propagation' section and toggle the feature to enable it for column-level and asset-level propagation. +Currently, Column Level propagation is supported, with asset level propagation coming soon. + +

+ +

+ + +### In DataHub Cloud + +1. **Navigate to Automations**: Click on 'Govern' > 'Automations' in the navigation bar. + +

+ +

+ +2. **Create An Automation**: Click on 'Create' and select 'Column Documentation Propagation'. + +

+ +

+ +3. **Configure Automation**: Fill in the required fields, such as the name, description, and category. Finally, click 'Save and Run' to start the automation + +

+ +

+ +## Propagating for Existing Assets (DataHub Cloud Only) + +In DataHub Cloud, you can back-fill historical data for existing assets to ensure that all existing column descriptions are propagated to downstreams +when you start the automation. Note that it may take some time to complete the initial back-filling process, depending on the number of assets and the complexity of your lineage. + +To do this, navigate to the Automation you created in Step 3 above, click the 3-dot "more" menu: + +

+ +

+ +and then click "Initialize". + +

+ +

+ +This one-time step will kick off the back-filling process for existing descriptions. If you only want to begin propagating +descriptions going forward, you can skip this step. + +## Rolling Back Propagated Descriptions (DataHub Cloud Only) + +In DataHub Cloud, you can rollback all descriptions that have been propagated historically. + +This feature allows you to "clean up" or "undo" any accidental propagation that may have occurred automatically, in the case +that you no longer want propagated descriptions to be visible. + +To do this, navigate to the Automation you created in Step 3 above, click the 3-dot "More" menu + +

+ +

+ +and then click "Rollback". + +

+ +

+ +This one-time step will remove all propagated tags and glossary terms from Snowflake. To simply stop propagating new tags, you can disable the automation. + +## Viewing Propagated Descriptions + +Once the automation is enabled, you'll be able to recognize propagated descriptions as those with the thunderbolt icon next to them: + +The tooltip will provide additional information, including where the description originated and any intermediate hops that were +used to propagate the description. + +

+ +

\ No newline at end of file diff --git a/docs/automation/snowflake-tag-propagation.md b/docs/automation/snowflake-tag-propagation.md new file mode 100644 index 00000000000000..bdc80376dfb484 --- /dev/null +++ b/docs/automation/snowflake-tag-propagation.md @@ -0,0 +1,88 @@ + +import FeatureAvailability from '@site/src/components/FeatureAvailability'; + +# Snowflake Tag Propagation Automation + + + +## Introduction + +Snowflake Tag Propagation is an automation that allows you to sync DataHub Glossary Terms and Tags on +both columns and tables back to Snowflake. This automation is available in DataHub Cloud (Acryl) only. + +## Capabilities + +- Automatically Add DataHub Glossary Terms to Snowflake Tables and Columns +- Automatically Add DataHub Tags to Snowflake Tables and Columns +- Automatically Remove DataHub Glossary Terms and Tags from Snowflake Tables and Columns when they are removed in DataHub + +## Enabling Snowflake Tag Sync + +1. **Navigate to Automations**: Click on 'Govern' > 'Automations' in the navigation bar. + +

+ +

+ +2. **Create An Automation**: Click on 'Create' and select 'Snowflake Tag Propagation'. + +

+ +

+ +3. **Configure Automation**: Fill in the required fields to connect to Snowflake, along with the name, description, and category. +Note that you can limit propagation based on specific Tags and Glossary Terms. If none are selected, then ALL Tags or Glossary Terms will be automatically +propagated to Snowflake tables and columns. Finally, click 'Save and Run' to start the automation + +

+ +

+ +## Propagating for Existing Assets + +You can back-fill historical data for existing assets to ensure that all existing column and table Tags and Glossary Terms are propagated to Snowflake. +Note that it may take some time to complete the initial back-filling process, depending on the number of Snowflake assets you have. + +To do so, navigate to the Automation you created in Step 3 above, click the 3-dot "More" menu + +

+ +

+ +and then click "Initialize". + +

+ +

+ +This one-time step will kick off the back-filling process for existing descriptions. If you only want to begin propagating +descriptions going forward, you can skip this step. + +## Rolling Back Propagated Tags + +You can rollback all tags and glossary terms that have been propagated historically. + +This feature allows you to "clean up" or "undo" any accidental propagation that may have occurred automatically, in the case +that you no longer want propagated descriptions to be visible. + +To do this, navigate to the Automation you created in Step 3 above, click the 3-dot "More" menu + +

+ +

+ +and then click "Rollback". + +

+ +

+ +This one-time step will remove all propagated tags and glossary terms from Snowflake. To simply stop propagating new tags, you can disable the automation. + +## Viewing Propagated Tags + +You can view propagated Tags (and corresponding DataHub URNs) inside the Snowflake UI to confirm the automation is working as expected. + +

+ +

diff --git a/docs/deploy/confluent-cloud.md b/docs/deploy/confluent-cloud.md index 096fd9984f474d..46931de745be62 100644 --- a/docs/deploy/confluent-cloud.md +++ b/docs/deploy/confluent-cloud.md @@ -17,9 +17,8 @@ First, you'll need to create following new topics in the [Confluent Control Cent 7. (Deprecated) **MetadataAuditEvent_v4**: Metadata change log messages 8. (Deprecated) **FailedMetadataChangeEvent_v4**: Failed to process #1 event 9. **MetadataGraphEvent_v4**: -10. **MetadataGraphEvent_v4**: -11. **PlatformEvent_v1** -12. **DataHubUpgradeHistory_v1**: Notifies the end of DataHub Upgrade job so dependants can act accordingly (_eg_, startup). +10. **PlatformEvent_v1** +11. **DataHubUpgradeHistory_v1**: Notifies the end of DataHub Upgrade job so dependants can act accordingly (_eg_, startup). Note this topic requires special configuration: **Infinite retention**. Also, 1 partition is enough for the occasional traffic. The first five are the most important, and are explained in more depth in [MCP/MCL](../advanced/mcp-mcl.md). The final topics are @@ -243,4 +242,4 @@ Accepting contributions for a setup script compatible with Confluent Cloud! The kafka-setup-job container we ship with is only compatible with a distribution of Kafka wherein ZooKeeper is exposed and available. A version of the job using the [Confluent CLI](https://docs.confluent.io/confluent-cli/current/command-reference/kafka/topic/confluent_kafka_topic_create.html) -would be very useful for the broader community. \ No newline at end of file +would be very useful for the broader community. diff --git a/docs/developers.md b/docs/developers.md index 0c9d7bee3d79f2..401169490dd4b6 100644 --- a/docs/developers.md +++ b/docs/developers.md @@ -46,7 +46,7 @@ Use [gradle wrapper](https://docs.gradle.org/current/userguide/gradle_wrapper.ht ./gradlew build ``` -Note that the above will also run run tests and a number of validations which makes the process considerably slower. +Note that the above will also run tests and a number of validations which makes the process considerably slower. We suggest partially compiling DataHub according to your needs: diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index 08ababcb5cfce9..2443375099b7b2 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -20,6 +20,16 @@ This file documents any backwards-incompatible changes in DataHub and assists pe ### Breaking Changes +### Potential Downtime + +### Deprecations + +### Other Notable Changes + +## 0.14.0 + +### Breaking Changes + - Protobuf CLI will no longer create binary encoded protoc custom properties. Flag added `-protocProp` in case this behavior is required. - #10814 Data flow info and data job info aspect will produce an additional field that will require a corresponding upgrade of server. Otherwise server can reject the aspects. diff --git a/docs/managed-datahub/configuring-identity-provisioning-with-okta.md b/docs/managed-datahub/configuring-identity-provisioning-with-okta.md new file mode 100644 index 00000000000000..a7939b514166da --- /dev/null +++ b/docs/managed-datahub/configuring-identity-provisioning-with-okta.md @@ -0,0 +1,119 @@ +--- +title: "SCIM Integration: Okta and DataHub" +hide_title: true +--- +import FeatureAvailability from '@site/src/components/FeatureAvailability'; + +## SCIM Integration: Okta and DataHub + + +## Overview +This document covers the steps required to enable SCIM provisioning from Okta to DataHub. + +This document assumes you are using OIDC for SSO with DataHub. +Since Okta doesn't currently support SCIM with OIDC, you would need to create an additional SWA-app-integration to enable SCIM provisioning. + +On completing the steps in this guide, Okta will start automatically pushing changes to users/groups of this SWA-app-integration to DataHub, thereby simplifying provisioning of users/groups in DataHub. + +### Why SCIM provisioning? +Let us look at an example of the flows enabled through SCIM provisioning. + +Consider the following configuration in Okta +- A group `governance-team` +- And it has two members `john` and `sid` +- And the group has role `Reader` + +Through SCIM provisioning, the following are enabled: +* If the `governance-team` group is assigned to the DataHub app in Okta with the role `Reader`, Okta will create the users `john` and `sid` in DataHub with the `Reader` role. +* If you remove `john` from group `governance-team` then `john` would automatically get deactivated in DataHub. +* If you remove `sid` from the DataHub app in Okta, then `sid` would automatically get deactivated in DataHub. + +Generally, any user assignment/unassignment to the app in Okta - directly or through groups - are automatically reflected in the DataHub application. + +This guide also covers other variations such as how to assign a role to a user directly, and how group-information can be pushed to DataHub. + +> Only Admin, Editor and Reader roles are supported in DataHub. These roles are preconfigured/created on DataHub. + +## Configuring SCIM provisioning + +### 1. Create an SWA app integration +a). Create a new [SWA app integration](https://help.okta.com/en-us/content/topics/apps/apps_app_integration_wizard_swa.htm), called say, `DataHub-SCIM-SWA`. + +Note: this app-integration will only be used for SCIM provisioning. You would continue to use the existing OIDC-app-integration for SSO. + +b). In the `General` tab of the `DataHub-SCIM-SWA` application, check the `Enable SCIM provisioning` option + +

+ +

+ +You may also want to configure the other selections as shown in the above image, so that this application isn't visible to your users. + +### 2. Configure SCIM + +a). Generate a personal access token from [DataHub](../../docs/authentication/personal-access-tokens.md#creating-personal-access-tokens). + +b). In the `Provisioning` tab, configure the DataHub-SCIM endpoint as shown in the below image: + +

+ +

+ +**Note**: Set the value of the `Bearer` field to the personal access token obtained in step (a) above. + +c). Configure the `To App` section as shown below: + +

+ +

+ +**Note**: We are not pushing passwords to DataHub over SCIM, since we are assuming SSO with OIDC as mentioned earlier. + +### 3. Add a custom attribute to represent roles +a). Navigate to `Directory` -> `Profile Editor`, and select the user-profile of this new application. + +

+ +

+ +b). Click `Add Attribute` and define a new attribute that will be used to specify the role of a DataHub user. + +

+ +

+ +* Set value of `External name` to `roles.^[primary==true].value` +* Set value of `External namespace` to `urn:ietf:params:scim:schemas:core:2.0:User` +* Define an enumerated list of values as shown in the above image +* Mark this attribute as required +* Select `Attribute type` as `Personal` + +c). Add a similar attribute for groups i.e. repeat step (b) above, but select `Attribute Type` as `Group`. (Specify the variable name as, say, `dataHubGroupRoles`.) + +### 4. Assign users & groups to the app +Assign users and groups to the app from the `Assignments` tab: + +

+ +

+ +While assigning a user/group, choose an appropriate value for the dataHubRoles/dataHubGroupRoles attribute. +Note that when a role is selected for a group, the corresponding role is pushed for all users of that group in DataHub. + +### The provisioning setup is now complete +Once the above steps are completed, user assignments/unassignments to the DataHub-SCIM-SWA app in Okta will get reflected in DataHub automatically. + +> #### A note on user deletion +>Note that when users are unassigned or deactivated in Okta, the corresponding users in DataHub are also deactivated (marked "suspended"). +But when a user is *deleted* in Okta, the corresponding user in DataHub does *not* get deleted. +Refer the Okta documentation on [Delete (Deprovision)](https://developer.okta.com/docs/concepts/scim/#delete-deprovision) for more details. + +### 5. (Optional): Configure push groups +When groups are assigned to the app, Okta pushes the group-members as users to DataHub, but the group itself isn't pushed. +To push group information to DataHub, configure the `Push Groups` tab accordingly as shown below: + +

+ +

+ +Refer to the Okta [Group Push](https://help.okta.com/en-us/content/topics/users-groups-profiles/app-assignments-group-push.htm) documentation for more details. \ No newline at end of file diff --git a/docs/managed-datahub/subscription-and-notification.md b/docs/managed-datahub/subscription-and-notification.md index 5c2eaa17e5f2c9..c3c31d5fed7e61 100644 --- a/docs/managed-datahub/subscription-and-notification.md +++ b/docs/managed-datahub/subscription-and-notification.md @@ -27,6 +27,9 @@ If you want to create and manage group-level Subscriptions for your team, you wi - Manage Group Notification Settings - Manage Group Subscriptions +And to manage other user's subscriptions: +- Manage User Subscriptions + ## Using DataHub’s Subscriptions and Notifications Feature The first step is identifying the assets you want to subscribe to. diff --git a/docs/quick-ingestion-guides/bigquery/setup.md b/docs/quick-ingestion-guides/bigquery/setup.md index 10351d6572c531..96850f2deb68ed 100644 --- a/docs/quick-ingestion-guides/bigquery/setup.md +++ b/docs/quick-ingestion-guides/bigquery/setup.md @@ -38,7 +38,9 @@ Please refer to the BigQuery [Permissions](https://cloud.google.com/iam/docs/per You can always add/remove roles to Service Accounts later on. Please refer to the BigQuery [Manage access to projects, folders, and organizations](https://cloud.google.com/iam/docs/granting-changing-revoking-access) guide for more details. ::: -3. Create and download a [Service Account Key](https://cloud.google.com/iam/docs/creating-managing-service-account-keys). We will use this to set up authentication within DataHub. +3. To filter projects based on the `project_labels` configuration, first visit [cloudresourcemanager.googleapis.com](https://console.developers.google.com/apis/api/cloudresourcemanager.googleapis.com/overview) and enable the `Cloud Resource Manager API` + +4. Create and download a [Service Account Key](https://cloud.google.com/iam/docs/creating-managing-service-account-keys). We will use this to set up authentication within DataHub. The key file looks like this: diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/models/graph/Edge.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/models/graph/Edge.java index 3de09e599d99ed..8777be57e1bd8f 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/models/graph/Edge.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/models/graph/Edge.java @@ -13,6 +13,7 @@ import java.util.Map; import java.util.Optional; import java.util.stream.Collectors; +import javax.annotation.Nonnull; import lombok.AllArgsConstructor; import lombok.Data; import lombok.EqualsAndHashCode; @@ -59,7 +60,7 @@ public Edge( null); } - public String toDocId() { + public String toDocId(@Nonnull String idHashAlgo) { StringBuilder rawDocId = new StringBuilder(); rawDocId .append(getSource().toString()) @@ -72,9 +73,8 @@ public String toDocId() { } try { - String hashAlgo = System.getenv("ELASTIC_ID_HASH_ALGO"); byte[] bytesOfRawDocID = rawDocId.toString().getBytes(StandardCharsets.UTF_8); - MessageDigest md = MessageDigest.getInstance(hashAlgo); + MessageDigest md = MessageDigest.getInstance(idHashAlgo); byte[] thedigest = md.digest(bytesOfRawDocID); return Base64.getEncoder().encodeToString(thedigest); } catch (NoSuchAlgorithmException e) { diff --git a/metadata-ingestion-modules/airflow-plugin/setup.py b/metadata-ingestion-modules/airflow-plugin/setup.py index 2401b169cd6607..80d2efd3ed164e 100644 --- a/metadata-ingestion-modules/airflow-plugin/setup.py +++ b/metadata-ingestion-modules/airflow-plugin/setup.py @@ -16,7 +16,9 @@ def get_long_description(): _version: str = package_metadata["__version__"] _self_pin = ( - f"=={_version}" if not (_version.endswith("dev0") or "docker" in _version) else "" + f"=={_version}" + if not (_version.endswith(("dev0", "dev1")) or "docker" in _version) + else "" ) diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/custom_operator_dag.py b/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/custom_operator_dag.py new file mode 100644 index 00000000000000..b31226b7b4ceeb --- /dev/null +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/custom_operator_dag.py @@ -0,0 +1,74 @@ +import logging +from datetime import datetime, timedelta +from typing import Any, List, Tuple + +from airflow import DAG +from airflow.models.baseoperator import BaseOperator + +from datahub_airflow_plugin.entities import Dataset + +logger = logging.getLogger(__name__) + + +class CustomOperator(BaseOperator): + def __init__(self, name, **kwargs): + super().__init__(**kwargs) + self.name = name + + def execute(self, context): + """ + Other code.... + """ + logger.info("executing other code here") + + input_tables = ["mydb.schema.tableA", "mydb.schema.tableB"] + output_tables = ["mydb.schema.tableD"] + + inlets, outlets = self._get_sf_lineage(input_tables, output_tables) + + context["ti"].task.inlets = inlets + context["ti"].task.outlets = outlets + + @staticmethod + def _get_sf_lineage( + input_tables: List[str], output_tables: List[str] + ) -> Tuple[List[Any], List[Any]]: + """ + Get lineage tables from Snowflake. + """ + inlets: List[Dataset] = [] + outlets: List[Dataset] = [] + + for table in input_tables: + inlets.append(Dataset(platform="snowflake", name=table)) + + for table in output_tables: + outlets.append(Dataset(platform="snowflake", name=table)) + + return inlets, outlets + + +default_args = { + "owner": "airflow", + "depends_on_past": False, + "start_date": datetime(2023, 1, 1), + "email": ["jdoe@example.com"], + "email_on_failure": False, + "execution_timeout": timedelta(minutes=5), +} + + +with DAG( + "custom_operator_dag", + default_args=default_args, + description="An example dag with custom operator", + schedule_interval=None, + tags=["example_tag"], + catchup=False, + default_view="tree", +) as dag: + custom_task = CustomOperator( + task_id="custom_task_id", + name="custom_name", + dag=dag, + ) diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_custom_operator_dag.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_custom_operator_dag.json new file mode 100644 index 00000000000000..b81466930ed41a --- /dev/null +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_custom_operator_dag.json @@ -0,0 +1,365 @@ +[ +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,custom_operator_dag,prod)", + "changeType": "UPSERT", + "aspectName": "dataFlowInfo", + "aspect": { + "json": { + "customProperties": { + "_access_control": "None", + "catchup": "False", + "description": "'An example dag with custom operator'", + "doc_md": "None", + "fileloc": "", + "is_paused_upon_creation": "None", + "start_date": "None", + "tags": "['example_tag']", + "timezone": "Timezone('UTC')" + }, + "externalUrl": "http://airflow.example.com/tree?dag_id=custom_operator_dag", + "name": "custom_operator_dag", + "description": "An example dag with custom operator" + } + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,custom_operator_dag,prod)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:airflow", + "type": "DEVELOPER", + "source": { + "type": "SERVICE" + } + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:airflow" + } + } + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,custom_operator_dag,prod)", + "changeType": "UPSERT", + "aspectName": "globalTags", + "aspect": { + "json": { + "tags": [ + { + "tag": "urn:li:tag:example_tag" + } + ] + } + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,custom_operator_dag,prod)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,custom_operator_dag,prod),custom_task_id)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:example_tag", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,custom_operator_dag,prod)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "custom_operator_dag" + } + ] + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,custom_operator_dag,prod),custom_task_id)", + "changeType": "UPSERT", + "aspectName": "dataJobInfo", + "aspect": { + "json": { + "customProperties": { + "depends_on_past": "False", + "email": "['jdoe@example.com']", + "label": "'custom_task_id'", + "execution_timeout": "datetime.timedelta(seconds=300)", + "sla": "None", + "task_id": "'custom_task_id'", + "trigger_rule": "", + "wait_for_downstream": "False", + "downstream_task_ids": "[]", + "inlets": "[]", + "outlets": "[]", + "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/BaseFacet\", \"unknownItems\": [{\"name\": \"CustomOperator\", \"properties\": {\"depends_on_past\": false, \"downstream_task_ids\": \"[]\", \"execution_timeout\": \"<>\", \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"is_setup\": false, \"is_teardown\": false, \"mapped\": false, \"operator_class\": \"custom_operator.CustomOperator\", \"owner\": \"airflow\", \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_exponential_backoff\": false, \"task_id\": \"custom_task_id\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": \"[]\", \"wait_for_downstream\": false, \"wait_for_past_depends_before_skipping\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}" + }, + "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=custom_operator_dag&_flt_3_task_id=custom_task_id", + "name": "custom_task_id", + "type": { + "string": "COMMAND" + } + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,custom_operator_dag,prod),custom_task_id)", + "changeType": "UPSERT", + "aspectName": "dataJobInputOutput", + "aspect": { + "json": { + "inputDatasets": [], + "outputDatasets": [], + "inputDatajobs": [], + "fineGrainedLineages": [] + } + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:07a4aaeffa3875a24cccd1fec6fc7c8c", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceProperties", + "aspect": { + "json": { + "customProperties": { + "run_id": "manual_run_test", + "duration": "", + "start_date": "", + "end_date": "", + "execution_date": "2023-09-27 21:34:38+00:00", + "try_number": "0", + "max_tries": "0", + "external_executor_id": "None", + "state": "running", + "operator": "CustomOperator", + "priority_weight": "1", + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=custom_task_id&dag_id=custom_operator_dag&map_index=-1", + "orchestrator": "airflow", + "dag_id": "custom_operator_dag", + "task_id": "custom_task_id" + }, + "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=custom_task_id&dag_id=custom_operator_dag&map_index=-1", + "name": "custom_operator_dag_custom_task_id_manual_run_test", + "type": "BATCH_AD_HOC", + "created": { + "time": 1722943444074, + "actor": "urn:li:corpuser:datahub" + } + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,custom_operator_dag,prod),custom_task_id)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:airflow", + "type": "DEVELOPER", + "source": { + "type": "SERVICE" + } + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:airflow" + } + } + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:07a4aaeffa3875a24cccd1fec6fc7c8c", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRelationships", + "aspect": { + "json": { + "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,custom_operator_dag,prod),custom_task_id)", + "upstreamInstances": [] + } + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:07a4aaeffa3875a24cccd1fec6fc7c8c", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRunEvent", + "aspect": { + "json": { + "timestampMillis": 1722943444074, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "status": "STARTED", + "attempt": 1 + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetKey", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:snowflake", + "name": "mydb.schema.tableA", + "origin": "PROD" + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,custom_operator_dag,prod),custom_task_id)", + "changeType": "UPSERT", + "aspectName": "dataJobInfo", + "aspect": { + "json": { + "customProperties": { + "depends_on_past": "False", + "email": "['jdoe@example.com']", + "label": "'custom_task_id'", + "execution_timeout": "datetime.timedelta(seconds=300)", + "sla": "None", + "task_id": "'custom_task_id'", + "trigger_rule": "", + "wait_for_downstream": "False", + "downstream_task_ids": "[]", + "inlets": "[Dataset(platform='snowflake', name='mydb.schema.tableA', env='PROD', platform_instance=None), Dataset(platform='snowflake', name='mydb.schema.tableB', env='PROD', platform_instance=None)]", + "outlets": "[Dataset(platform='snowflake', name='mydb.schema.tableD', env='PROD', platform_instance=None)]", + "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/BaseFacet\", \"unknownItems\": [{\"name\": \"CustomOperator\", \"properties\": {\"depends_on_past\": false, \"downstream_task_ids\": \"[]\", \"execution_timeout\": \"<>\", \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"is_setup\": false, \"is_teardown\": false, \"mapped\": false, \"operator_class\": \"custom_operator.CustomOperator\", \"owner\": \"airflow\", \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_exponential_backoff\": false, \"task_id\": \"custom_task_id\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": \"[]\", \"wait_for_downstream\": false, \"wait_for_past_depends_before_skipping\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}" + }, + "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=custom_operator_dag&_flt_3_task_id=custom_task_id", + "name": "custom_task_id", + "type": { + "string": "COMMAND" + } + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetKey", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:snowflake", + "name": "mydb.schema.tableD", + "origin": "PROD" + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,custom_operator_dag,prod),custom_task_id)", + "changeType": "UPSERT", + "aspectName": "globalTags", + "aspect": { + "json": { + "tags": [ + { + "tag": "urn:li:tag:example_tag" + } + ] + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,custom_operator_dag,prod),custom_task_id)", + "changeType": "UPSERT", + "aspectName": "dataJobInputOutput", + "aspect": { + "json": { + "inputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,PROD)" + ], + "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)" + ], + "inputDatajobs": [], + "fineGrainedLineages": [] + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetKey", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:snowflake", + "name": "mydb.schema.tableB", + "origin": "PROD" + } + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:07a4aaeffa3875a24cccd1fec6fc7c8c", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRunEvent", + "aspect": { + "json": { + "timestampMillis": 1722943444263, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "status": "COMPLETE", + "result": { + "type": "SUCCESS", + "nativeResultType": "airflow" + } + } + } +} +] \ No newline at end of file diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_custom_operator_dag_no_dag_listener.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_custom_operator_dag_no_dag_listener.json new file mode 100644 index 00000000000000..019122600aedbc --- /dev/null +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_custom_operator_dag_no_dag_listener.json @@ -0,0 +1,404 @@ +[ +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,custom_operator_dag,prod)", + "changeType": "UPSERT", + "aspectName": "dataFlowInfo", + "aspect": { + "json": { + "customProperties": { + "_access_control": "None", + "catchup": "False", + "description": "'An example dag with custom operator'", + "doc_md": "None", + "fileloc": "", + "is_paused_upon_creation": "None", + "start_date": "None", + "tags": "['example_tag']", + "timezone": "Timezone('UTC')" + }, + "externalUrl": "http://airflow.example.com/tree?dag_id=custom_operator_dag", + "name": "custom_operator_dag", + "description": "An example dag with custom operator" + } + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,custom_operator_dag,prod)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:airflow", + "type": "DEVELOPER", + "source": { + "type": "SERVICE" + } + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:airflow" + } + } + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,custom_operator_dag,prod)", + "changeType": "UPSERT", + "aspectName": "globalTags", + "aspect": { + "json": { + "tags": [ + { + "tag": "urn:li:tag:example_tag" + } + ] + } + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,custom_operator_dag,prod)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,custom_operator_dag,prod),custom_task_id)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:example_tag", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,custom_operator_dag,prod)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "custom_operator_dag" + } + ] + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,custom_operator_dag,prod),custom_task_id)", + "changeType": "UPSERT", + "aspectName": "dataJobInfo", + "aspect": { + "json": { + "customProperties": { + "depends_on_past": "False", + "email": "['jdoe@example.com']", + "label": "'custom_task_id'", + "execution_timeout": "datetime.timedelta(seconds=300)", + "sla": "None", + "task_id": "'custom_task_id'", + "trigger_rule": "", + "wait_for_downstream": "False", + "downstream_task_ids": "[]", + "inlets": "[]", + "outlets": "[]", + "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/BaseFacet\", \"unknownItems\": [{\"name\": \"CustomOperator\", \"properties\": {\"depends_on_past\": false, \"downstream_task_ids\": \"[]\", \"execution_timeout\": \"<>\", \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"mapped\": false, \"operator_class\": \"custom_operator.CustomOperator\", \"owner\": \"airflow\", \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_exponential_backoff\": false, \"task_id\": \"custom_task_id\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": \"[]\", \"wait_for_downstream\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}" + }, + "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=custom_operator_dag&_flt_3_task_id=custom_task_id", + "name": "custom_task_id", + "type": { + "string": "COMMAND" + } + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,custom_operator_dag,prod),custom_task_id)", + "changeType": "UPSERT", + "aspectName": "dataJobInputOutput", + "aspect": { + "json": { + "inputDatasets": [], + "outputDatasets": [], + "inputDatajobs": [], + "fineGrainedLineages": [] + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,custom_operator_dag,prod),custom_task_id)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:airflow", + "type": "DEVELOPER", + "source": { + "type": "SERVICE" + } + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:airflow" + } + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,custom_operator_dag,prod),custom_task_id)", + "changeType": "UPSERT", + "aspectName": "globalTags", + "aspect": { + "json": { + "tags": [ + { + "tag": "urn:li:tag:example_tag" + } + ] + } + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:07a4aaeffa3875a24cccd1fec6fc7c8c", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceProperties", + "aspect": { + "json": { + "customProperties": { + "run_id": "manual_run_test", + "duration": "", + "start_date": "", + "end_date": "", + "execution_date": "2023-09-27 21:34:38+00:00", + "try_number": "0", + "max_tries": "0", + "external_executor_id": "None", + "state": "running", + "operator": "CustomOperator", + "priority_weight": "1", + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=custom_task_id&dag_id=custom_operator_dag&map_index=-1", + "orchestrator": "airflow", + "dag_id": "custom_operator_dag", + "task_id": "custom_task_id" + }, + "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=custom_task_id&dag_id=custom_operator_dag&map_index=-1", + "name": "custom_operator_dag_custom_task_id_manual_run_test", + "type": "BATCH_AD_HOC", + "created": { + "time": 1723716446564, + "actor": "urn:li:corpuser:datahub" + } + } + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:07a4aaeffa3875a24cccd1fec6fc7c8c", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRelationships", + "aspect": { + "json": { + "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,custom_operator_dag,prod),custom_task_id)", + "upstreamInstances": [] + } + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:07a4aaeffa3875a24cccd1fec6fc7c8c", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRunEvent", + "aspect": { + "json": { + "timestampMillis": 1723716446564, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "status": "STARTED", + "attempt": 1 + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,custom_operator_dag,prod),custom_task_id)", + "changeType": "UPSERT", + "aspectName": "dataJobInfo", + "aspect": { + "json": { + "customProperties": { + "depends_on_past": "False", + "email": "['jdoe@example.com']", + "label": "'custom_task_id'", + "execution_timeout": "datetime.timedelta(seconds=300)", + "sla": "None", + "task_id": "'custom_task_id'", + "trigger_rule": "", + "wait_for_downstream": "False", + "downstream_task_ids": "[]", + "inlets": "[Dataset(platform='snowflake', name='mydb.schema.tableA', env='PROD', platform_instance=None), Dataset(platform='snowflake', name='mydb.schema.tableB', env='PROD', platform_instance=None)]", + "outlets": "[Dataset(platform='snowflake', name='mydb.schema.tableD', env='PROD', platform_instance=None)]", + "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/BaseFacet\", \"unknownItems\": [{\"name\": \"CustomOperator\", \"properties\": {\"depends_on_past\": false, \"downstream_task_ids\": \"[]\", \"execution_timeout\": \"<>\", \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"mapped\": false, \"operator_class\": \"custom_operator.CustomOperator\", \"owner\": \"airflow\", \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_exponential_backoff\": false, \"task_id\": \"custom_task_id\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": \"[]\", \"wait_for_downstream\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}" + }, + "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=custom_operator_dag&_flt_3_task_id=custom_task_id", + "name": "custom_task_id", + "type": { + "string": "COMMAND" + } + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,custom_operator_dag,prod),custom_task_id)", + "changeType": "UPSERT", + "aspectName": "dataJobInputOutput", + "aspect": { + "json": { + "inputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,PROD)" + ], + "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)" + ], + "inputDatajobs": [], + "fineGrainedLineages": [] + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetKey", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:snowflake", + "name": "mydb.schema.tableA", + "origin": "PROD" + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetKey", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:snowflake", + "name": "mydb.schema.tableB", + "origin": "PROD" + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetKey", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:snowflake", + "name": "mydb.schema.tableD", + "origin": "PROD" + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,custom_operator_dag,prod),custom_task_id)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:airflow", + "type": "DEVELOPER", + "source": { + "type": "SERVICE" + } + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:airflow" + } + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,custom_operator_dag,prod),custom_task_id)", + "changeType": "UPSERT", + "aspectName": "globalTags", + "aspect": { + "json": { + "tags": [ + { + "tag": "urn:li:tag:example_tag" + } + ] + } + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:07a4aaeffa3875a24cccd1fec6fc7c8c", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRunEvent", + "aspect": { + "json": { + "timestampMillis": 1723716446701, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "status": "COMPLETE", + "result": { + "type": "SUCCESS", + "nativeResultType": "airflow" + } + } + } +} +] \ No newline at end of file diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py b/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py index 9ea822edeef81f..2b8d4c47f62246 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py @@ -110,7 +110,9 @@ def _wait_for_dag_finish( @contextlib.contextmanager def _run_airflow( - tmp_path: pathlib.Path, dags_folder: pathlib.Path, is_v1: bool + tmp_path: pathlib.Path, + dags_folder: pathlib.Path, + is_v1: bool, ) -> Iterator[AirflowInstance]: airflow_home = tmp_path / "airflow_home" print(f"Using airflow home: {airflow_home}") @@ -272,6 +274,7 @@ class DagTestCase: DagTestCase("basic_iolets"), DagTestCase("snowflake_operator", success=False, v2_only=True), DagTestCase("sqlite_operator", v2_only=True), + DagTestCase("custom_operator_dag", v2_only=True), ] diff --git a/metadata-ingestion-modules/dagster-plugin/build.gradle b/metadata-ingestion-modules/dagster-plugin/build.gradle index 6cb7b9295549a7..74ca7cedea3a52 100644 --- a/metadata-ingestion-modules/dagster-plugin/build.gradle +++ b/metadata-ingestion-modules/dagster-plugin/build.gradle @@ -33,7 +33,7 @@ task installPackage(type: Exec, dependsOn: [environmentSetup, ':metadata-ingesti outputs.file(sentinel_file) commandLine 'bash', '-c', "source ${venv_name}/bin/activate && set -x && " + - "uv pip install -e . ${extra_pip_requirements} && " + + "${pip_install_command} -e . ${extra_pip_requirements} && " + "touch ${sentinel_file}" } @@ -45,15 +45,11 @@ task installDev(type: Exec, dependsOn: [install]) { outputs.file(sentinel_file) commandLine 'bash', '-c', "source ${venv_name}/bin/activate && set -x && " + - "uv pip install -e .[dev] ${extra_pip_requirements} && " + + "${pip_install_command} -e .[dev] ${extra_pip_requirements} && " + "touch ${sentinel_file}" } task lint(type: Exec, dependsOn: installDev) { - /* - The find/sed combo below is a temporary work-around for the following mypy issue with airflow 2.2.0: - "venv/lib/python3.8/site-packages/airflow/_vendor/connexion/spec.py:169: error: invalid syntax". - */ commandLine 'bash', '-c', "source ${venv_name}/bin/activate && set -x && " + "black --check --diff src/ tests/ examples/ && " + @@ -77,7 +73,7 @@ task installDevTest(type: Exec, dependsOn: [installDev]) { outputs.file(sentinel_file) commandLine 'bash', '-c', "source ${venv_name}/bin/activate && set -x && " + - "uv pip install -e .[dev,integration-tests] ${extra_pip_requirements} && " + + "${pip_install_command} -e .[dev,integration-tests] ${extra_pip_requirements} && " + "touch ${sentinel_file}" } @@ -105,10 +101,6 @@ task testQuick(type: Exec, dependsOn: installDevTest) { } -task testFull(type: Exec, dependsOn: [testQuick, installDevTest]) { - commandLine 'bash', '-x', '-c', - "source ${venv_name}/bin/activate && pytest -m 'not slow_integration' -vv --continue-on-collection-errors --junit-xml=junit.full.xml" -} task buildWheel(type: Exec, dependsOn: [environmentSetup]) { commandLine 'bash', '-c', "source ${venv_name}/bin/activate && " + 'uv pip install build && RELEASE_VERSION="\${RELEASE_VERSION:-0.0.0.dev1}" RELEASE_SKIP_INSTALL=1 RELEASE_SKIP_UPLOAD=1 ./scripts/release.sh' diff --git a/metadata-ingestion-modules/dagster-plugin/setup.py b/metadata-ingestion-modules/dagster-plugin/setup.py index 8a2a1d76d345bf..bf9fcf09a66bc1 100644 --- a/metadata-ingestion-modules/dagster-plugin/setup.py +++ b/metadata-ingestion-modules/dagster-plugin/setup.py @@ -17,7 +17,9 @@ def get_long_description(): _version: str = package_metadata["__version__"] _self_pin = ( - f"=={_version}" if not (_version.endswith("dev0") or "docker" in _version) else "" + f"=={_version}" + if not (_version.endswith(("dev0", "dev1")) or "docker" in _version) + else "" ) base_requirements = { @@ -25,9 +27,7 @@ def get_long_description(): "dagster >= 1.3.3", "dagit >= 1.3.3", *rest_common, - # Ignoring the dependency below because it causes issues with the vercel built wheel install - # f"acryl-datahub[datahub-rest]{_self_pin}", - "acryl-datahub[datahub-rest]", + f"acryl-datahub[datahub-rest]{_self_pin}", } mypy_stubs = { diff --git a/metadata-ingestion-modules/gx-plugin/.gitignore b/metadata-ingestion-modules/gx-plugin/.gitignore new file mode 100644 index 00000000000000..8c01744589e35e --- /dev/null +++ b/metadata-ingestion-modules/gx-plugin/.gitignore @@ -0,0 +1,143 @@ +.envrc +src/datahub_gx_plugin/__init__.py.bak +.vscode/ +output +pvenv36/ +bq_credentials.json +/tmp +*.bak + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# Generated classes +src/datahub/metadata/ +wheels/ +junit.quick.xml diff --git a/metadata-ingestion-modules/gx-plugin/README.md b/metadata-ingestion-modules/gx-plugin/README.md new file mode 100644 index 00000000000000..1ffd87a955432d --- /dev/null +++ b/metadata-ingestion-modules/gx-plugin/README.md @@ -0,0 +1,4 @@ +# Datahub GX Plugin + +See the DataHub GX docs for details. + diff --git a/metadata-ingestion-modules/gx-plugin/build.gradle b/metadata-ingestion-modules/gx-plugin/build.gradle new file mode 100644 index 00000000000000..f1adbc6676e5bc --- /dev/null +++ b/metadata-ingestion-modules/gx-plugin/build.gradle @@ -0,0 +1,123 @@ +plugins { + id 'base' +} + +ext { + python_executable = 'python3' + venv_name = 'venv' +} + +if (!project.hasProperty("extra_pip_requirements")) { + ext.extra_pip_requirements = "" +} + +def pip_install_command = "VIRTUAL_ENV=${venv_name} ${venv_name}/bin/uv pip install -e ../../metadata-ingestion" + +task checkPythonVersion(type: Exec) { + commandLine python_executable, '-c', 'import sys; assert sys.version_info >= (3, 8)' +} + +task environmentSetup(type: Exec, dependsOn: checkPythonVersion) { + def sentinel_file = "${venv_name}/.venv_environment_sentinel" + inputs.file file('setup.py') + outputs.file(sentinel_file) + commandLine 'bash', '-c', + "${python_executable} -m venv ${venv_name} && " + + "${venv_name}/bin/python -m pip install --upgrade pip uv wheel 'setuptools>=63.0.0' && " + + "touch ${sentinel_file}" +} + +task installPackage(type: Exec, dependsOn: [environmentSetup, ':metadata-ingestion:codegen']) { + def sentinel_file = "${venv_name}/.build_install_package_sentinel" + inputs.file file('setup.py') + outputs.file(sentinel_file) + commandLine 'bash', '-c', + "source ${venv_name}/bin/activate && set -x && " + + "${pip_install_command} -e . ${extra_pip_requirements} && " + + "touch ${sentinel_file}" +} + +task install(dependsOn: [installPackage]) + +task installDev(type: Exec, dependsOn: [install]) { + def sentinel_file = "${venv_name}/.build_install_dev_sentinel" + inputs.file file('setup.py') + outputs.file(sentinel_file) + commandLine 'bash', '-c', + "source ${venv_name}/bin/activate && set -x && " + + "${pip_install_command} -e .[dev] ${extra_pip_requirements} && " + + "touch ${sentinel_file}" +} + +task lint(type: Exec, dependsOn: installDev) { + commandLine 'bash', '-c', + "source ${venv_name}/bin/activate && set -x && " + + "black --check --diff src/ tests/ && " + + "isort --check --diff src/ tests/ && " + + "flake8 --count --statistics src/ tests/ && " + + "mypy --show-traceback --show-error-codes src/ tests/" +} +task lintFix(type: Exec, dependsOn: installDev) { + commandLine 'bash', '-x', '-c', + "source ${venv_name}/bin/activate && " + + "black src/ tests/ && " + + "isort src/ tests/ && " + + "flake8 src/ tests/ && " + + "mypy src/ tests/" +} + +task installDevTest(type: Exec, dependsOn: [installDev]) { + def sentinel_file = "${venv_name}/.build_install_dev_test_sentinel" + inputs.file file('setup.py') + outputs.dir("${venv_name}") + outputs.file(sentinel_file) + commandLine 'bash', '-c', + "source ${venv_name}/bin/activate && set -x && " + + "${pip_install_command} -e .[dev,integration-tests] ${extra_pip_requirements} && " + + "touch ${sentinel_file}" +} + +def testFile = hasProperty('testFile') ? testFile : 'unknown' +task testSingle(dependsOn: [installDevTest]) { + doLast { + if (testFile != 'unknown') { + exec { + commandLine 'bash', '-x', '-c', + "source ${venv_name}/bin/activate && pytest ${testFile}" + } + } else { + throw new GradleException("No file provided. Use -PtestFile=") + } + } +} + +task testQuick(type: Exec, dependsOn: installDevTest) { + // We can't enforce the coverage requirements if we run a subset of the tests. + inputs.files(project.fileTree(dir: "src/", include: "**/*.py")) + inputs.files(project.fileTree(dir: "tests/")) + outputs.dir("${venv_name}") + commandLine 'bash', '-x', '-c', + "source ${venv_name}/bin/activate && pytest -vv --continue-on-collection-errors --junit-xml=junit.quick.xml" +} + + +task buildWheel(type: Exec, dependsOn: [environmentSetup]) { + commandLine 'bash', '-c', "source ${venv_name}/bin/activate && " + + 'uv pip install build && RELEASE_VERSION="\${RELEASE_VERSION:-0.0.0.dev1}" RELEASE_SKIP_INSTALL=1 RELEASE_SKIP_UPLOAD=1 ./scripts/release.sh' +} + +task cleanPythonCache(type: Exec) { + commandLine 'bash', '-c', + "find src -type f -name '*.py[co]' -delete -o -type d -name __pycache__ -delete -o -type d -empty -delete" +} + +build.dependsOn install +check.dependsOn lint +check.dependsOn testQuick + +clean { + delete venv_name + delete 'build' + delete 'dist' +} +clean.dependsOn cleanPythonCache diff --git a/metadata-ingestion-modules/gx-plugin/pyproject.toml b/metadata-ingestion-modules/gx-plugin/pyproject.toml new file mode 100644 index 00000000000000..fba81486b9f677 --- /dev/null +++ b/metadata-ingestion-modules/gx-plugin/pyproject.toml @@ -0,0 +1,19 @@ +[build-system] +build-backend = "setuptools.build_meta" +requires = ["setuptools>=54.0.0", "wheel", "pip>=21.0.0"] + +[tool.black] +extend-exclude = ''' +# A regex preceded with ^/ will apply only to files and directories +# in the root of the project. +^/tmp +''' +include = '\.pyi?$' + +[tool.isort] +indent = ' ' +profile = 'black' +sections = 'FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER' + +[tool.pyright] +extraPaths = ['tests'] \ No newline at end of file diff --git a/metadata-ingestion-modules/gx-plugin/scripts/release.sh b/metadata-ingestion-modules/gx-plugin/scripts/release.sh new file mode 100755 index 00000000000000..058add495821cb --- /dev/null +++ b/metadata-ingestion-modules/gx-plugin/scripts/release.sh @@ -0,0 +1,26 @@ +#!/bin/bash +set -euxo pipefail + +if [[ ! ${RELEASE_SKIP_TEST:-} ]] && [[ ! ${RELEASE_SKIP_INSTALL:-} ]]; then + ../../gradlew build # also runs tests +elif [[ ! ${RELEASE_SKIP_INSTALL:-} ]]; then + ../../gradlew install +fi + +MODULE=datahub_gx_plugin + +# Check packaging constraint. +python -c 'import setuptools; where="./src"; assert setuptools.find_packages(where) == setuptools.find_namespace_packages(where), "you seem to be missing or have extra __init__.py files"' +if [[ ${RELEASE_VERSION:-} ]]; then + # Replace version with RELEASE_VERSION env variable + sed -i.bak "s/__version__ = \"1\!0.0.0.dev0\"/__version__ = \"$(echo $RELEASE_VERSION|sed s/-/+/)\"/" src/${MODULE}/__init__.py +else + vim src/${MODULE}/__init__.py +fi + +rm -rf build dist || true +python -m build +if [[ ! ${RELEASE_SKIP_UPLOAD:-} ]]; then + python -m twine upload 'dist/*' +fi +mv src/${MODULE}/__init__.py.bak src/${MODULE}/__init__.py diff --git a/metadata-ingestion-modules/gx-plugin/setup.cfg b/metadata-ingestion-modules/gx-plugin/setup.cfg new file mode 100644 index 00000000000000..bbdd85f0fdc4ed --- /dev/null +++ b/metadata-ingestion-modules/gx-plugin/setup.cfg @@ -0,0 +1,71 @@ +[flake8] +max-complexity = 15 +ignore = + # Ignore: line length issues, since black's formatter will take care of them. + E501, + # Ignore: 1 blank line required before class docstring. + D203, + # See https://stackoverflow.com/a/57074416. + W503, + # See https://github.com/psf/black/issues/315. + E203 +exclude = + .git, + venv, + .tox, + __pycache__ +per-file-ignores = + # imported but unused + __init__.py: F401 +ban-relative-imports = true + +[mypy] +plugins = + pydantic.mypy +exclude = ^(venv|build|dist)/ +ignore_missing_imports = yes +strict_optional = yes +check_untyped_defs = yes +disallow_incomplete_defs = yes +disallow_untyped_decorators = yes +warn_unused_configs = yes +# eventually we'd like to enable these +disallow_untyped_defs = no + +# try to be a bit more strict in certain areas of the codebase +[mypy-datahub.*] +ignore_missing_imports = no +[mypy-tests.*] +ignore_missing_imports = no + +[tool:pytest] +asyncio_mode = auto +addopts = --cov=src --cov-report term-missing --cov-config setup.cfg --strict-markers +markers = + integration: marks all integration tests, across all batches (deselect with '-m "not integration"') +testpaths = + tests/unit + tests/integration + +[coverage:run] +# Because of some quirks in the way setup.cfg, coverage.py, pytest-cov, +# and tox interact, we should not uncomment the following line. +# See https://pytest-cov.readthedocs.io/en/latest/config.html and +# https://coverage.readthedocs.io/en/coverage-5.0/config.html. +# We also have some additional pytest/cov config options in tox.ini. +# source = src + +[coverage:paths] +# This is necessary for tox-based coverage to be counted properly. +source = + src + */site-packages + +[coverage:report] +# The fail_under value ensures that at least some coverage data is collected. +# We override its value in the tox config. +show_missing = true +exclude_lines = + pragma: no cover + @abstract + if TYPE_CHECKING: diff --git a/metadata-ingestion-modules/gx-plugin/setup.py b/metadata-ingestion-modules/gx-plugin/setup.py new file mode 100644 index 00000000000000..1584111f820f59 --- /dev/null +++ b/metadata-ingestion-modules/gx-plugin/setup.py @@ -0,0 +1,157 @@ +import os +import pathlib + +import setuptools + +package_metadata: dict = {} +with open("./src/datahub_gx_plugin/__init__.py") as fp: + exec(fp.read(), package_metadata) + + +def get_long_description(): + root = os.path.dirname(__file__) + return pathlib.Path(os.path.join(root, "README.md")).read_text() + + +rest_common = {"requests", "requests_file"} + +# TODO: Can we move away from sqllineage and use sqlglot ?? +sqllineage_lib = { + "sqllineage==1.3.8", + # We don't have a direct dependency on sqlparse but it is a dependency of sqllineage. + # There have previously been issues from not pinning sqlparse, so it's best to pin it. + # Related: https://github.com/reata/sqllineage/issues/361 and https://github.com/reata/sqllineage/pull/360 + "sqlparse==0.4.4", +} + +_version: str = package_metadata["__version__"] +_self_pin = ( + f"=={_version}" + if not (_version.endswith(("dev0", "dev1")) or "docker" in _version) + else "" +) + +base_requirements = { + # Actual dependencies. + # This is temporary lower bound that we're open to loosening/tightening as requirements show up + "sqlalchemy>=1.4.39, <2", + # GE added handling for higher version of jinja2 in version 0.15.12 + # https://github.com/great-expectations/great_expectations/pull/5382/files + # TODO: support GX 0.18.0 + "great-expectations>=0.15.12, <0.18.0", + # datahub does not depend on traitlets directly but great expectations does. + # https://github.com/ipython/traitlets/issues/741 + "traitlets<5.2.2", + *rest_common, + *sqllineage_lib, + f"acryl-datahub[datahub-rest]{_self_pin}", +} + +mypy_stubs = { + "types-dataclasses", + "sqlalchemy-stubs", + "types-setuptools", + "types-six", + "types-python-dateutil", + "types-requests", + "types-toml", + "types-PyYAML", + "types-freezegun", + "types-cachetools", + # versions 0.1.13 and 0.1.14 seem to have issues + "types-click==0.1.12", + "types-tabulate", + # avrogen package requires this + "types-pytz", +} + +base_dev_requirements = { + *base_requirements, + *mypy_stubs, + "black==22.12.0", + "coverage>=5.1", + "flake8>=6.0.0", + "flake8-tidy-imports>=4.3.0", + "flake8-bugbear==23.3.12", + "isort>=5.7.0", + "mypy>=1.4.0", + # pydantic 1.8.2 is incompatible with mypy 0.910. + # See https://github.com/samuelcolvin/pydantic/pull/3175#issuecomment-995382910. + "pydantic>=1.10.0,!=1.10.3", + "pytest>=6.2.2", + "pytest-asyncio>=0.16.0", + "pytest-cov>=2.8.1", + "tox", + "deepdiff", + "requests-mock", + "freezegun", + "jsonpickle", + "build", + "twine", + "packaging", +} + +dev_requirements = { + *base_dev_requirements, +} + +integration_test_requirements = { + *dev_requirements, + "psycopg2-binary", + "pyspark", + f"acryl-datahub[testing-utils]{_self_pin}", + "pytest-docker>=1.1.0", +} + +entry_points = { + "gx.plugins": "acryl-datahub-gx-plugin = datahub_gx_plugin.action:DataHubValidationAction" +} + + +setuptools.setup( + # Package metadata. + name=package_metadata["__package_name__"], + version=package_metadata["__version__"], + url="https://datahubproject.io/", + project_urls={ + "Documentation": "https://datahubproject.io/docs/", + "Source": "https://github.com/datahub-project/datahub", + "Changelog": "https://github.com/datahub-project/datahub/releases", + }, + license="Apache License 2.0", + description="Datahub GX plugin to capture executions and send to Datahub", + long_description=get_long_description(), + long_description_content_type="text/markdown", + classifiers=[ + "Development Status :: 5 - Production/Stable", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Intended Audience :: Developers", + "Intended Audience :: Information Technology", + "Intended Audience :: System Administrators", + "License :: OSI Approved", + "License :: OSI Approved :: Apache Software License", + "Operating System :: Unix", + "Operating System :: POSIX :: Linux", + "Environment :: Console", + "Environment :: MacOS X", + "Topic :: Software Development", + ], + # Package info. + zip_safe=False, + python_requires=">=3.8", + package_dir={"": "src"}, + packages=setuptools.find_namespace_packages(where="./src"), + entry_points=entry_points, + # Dependencies. + install_requires=list(base_requirements), + extras_require={ + "ignore": [], # This is a dummy extra to allow for trailing commas in the list. + "dev": list(dev_requirements), + "integration-tests": list(integration_test_requirements), + }, +) diff --git a/metadata-ingestion-modules/gx-plugin/src/datahub_gx_plugin/__init__.py b/metadata-ingestion-modules/gx-plugin/src/datahub_gx_plugin/__init__.py new file mode 100644 index 00000000000000..a7689be82a5d99 --- /dev/null +++ b/metadata-ingestion-modules/gx-plugin/src/datahub_gx_plugin/__init__.py @@ -0,0 +1,21 @@ +# Published at https://pypi.org/project/acryl-datahub/. +__package_name__ = "acryl-datahub-gx-plugin" +__version__ = "1!0.0.0.dev0" + + +def is_dev_mode() -> bool: + return __version__.endswith("dev0") + + +def nice_version_name() -> str: + if is_dev_mode(): + return "unavailable (installed in develop mode)" + return __version__ + + +def get_provider_info(): + return { + "package-name": f"{__package_name__}", + "name": f"{__package_name__}", + "description": "Datahub metadata collector plugin", + } diff --git a/metadata-ingestion-modules/gx-plugin/src/datahub_gx_plugin/action.py b/metadata-ingestion-modules/gx-plugin/src/datahub_gx_plugin/action.py new file mode 100644 index 00000000000000..76e43cf8c2c3db --- /dev/null +++ b/metadata-ingestion-modules/gx-plugin/src/datahub_gx_plugin/action.py @@ -0,0 +1,871 @@ +import json +import logging +import sys +import time +from dataclasses import dataclass +from datetime import timezone +from decimal import Decimal +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union + +import datahub.emitter.mce_builder as builder +from datahub.cli.env_utils import get_boolean_env_variable +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.emitter.rest_emitter import DatahubRestEmitter +from datahub.emitter.serialization_helper import pre_json_transform +from datahub.ingestion.source.sql.sqlalchemy_uri_mapper import ( + get_platform_from_sqlalchemy_uri, +) +from datahub.metadata.com.linkedin.pegasus2avro.assertion import ( + AssertionInfo, + AssertionResult, + AssertionResultType, + AssertionRunEvent, + AssertionRunStatus, + AssertionStdAggregation, + AssertionStdOperator, + AssertionStdParameter, + AssertionStdParameters, + AssertionStdParameterType, + AssertionType, + BatchSpec, + DatasetAssertionInfo, + DatasetAssertionScope, +) +from datahub.metadata.com.linkedin.pegasus2avro.common import DataPlatformInstance +from datahub.metadata.schema_classes import PartitionSpecClass, PartitionTypeClass +from datahub.utilities._markupsafe_compat import MARKUPSAFE_PATCHED +from datahub.utilities.sql_parser import DefaultSQLParser +from great_expectations.checkpoint.actions import ValidationAction +from great_expectations.core.batch import Batch +from great_expectations.core.batch_spec import ( + RuntimeDataBatchSpec, + RuntimeQueryBatchSpec, + SqlAlchemyDatasourceBatchSpec, +) +from great_expectations.core.expectation_validation_result import ( + ExpectationSuiteValidationResult, +) +from great_expectations.data_asset.data_asset import DataAsset +from great_expectations.data_context import AbstractDataContext +from great_expectations.data_context.types.resource_identifiers import ( + ExpectationSuiteIdentifier, + ValidationResultIdentifier, +) +from great_expectations.execution_engine import PandasExecutionEngine +from great_expectations.execution_engine.sqlalchemy_execution_engine import ( + SqlAlchemyExecutionEngine, +) +from great_expectations.validator.validator import Validator +from sqlalchemy.engine.base import Connection, Engine +from sqlalchemy.engine.url import make_url + +if TYPE_CHECKING: + from great_expectations.data_context.types.resource_identifiers import ( + GXCloudIdentifier, + ) + +assert MARKUPSAFE_PATCHED +logger = logging.getLogger(__name__) +if get_boolean_env_variable("DATAHUB_DEBUG", False): + handler = logging.StreamHandler(stream=sys.stdout) + logger.addHandler(handler) + logger.setLevel(logging.DEBUG) + +GE_PLATFORM_NAME = "great-expectations" + + +class DataHubValidationAction(ValidationAction): + def __init__( + self, + data_context: AbstractDataContext, + server_url: str, + env: str = builder.DEFAULT_ENV, + platform_alias: Optional[str] = None, + platform_instance_map: Optional[Dict[str, str]] = None, + graceful_exceptions: bool = True, + token: Optional[str] = None, + timeout_sec: Optional[float] = None, + retry_status_codes: Optional[List[int]] = None, + retry_max_times: Optional[int] = None, + extra_headers: Optional[Dict[str, str]] = None, + exclude_dbname: Optional[bool] = None, + parse_table_names_from_sql: bool = False, + convert_urns_to_lowercase: bool = False, + name: str = "DataHubValidationAction", + ): + + super().__init__(data_context) + self.server_url = server_url + self.env = env + self.platform_alias = platform_alias + self.platform_instance_map = platform_instance_map + self.graceful_exceptions = graceful_exceptions + self.token = token + self.timeout_sec = timeout_sec + self.retry_status_codes = retry_status_codes + self.retry_max_times = retry_max_times + self.extra_headers = extra_headers + self.exclude_dbname = exclude_dbname + self.parse_table_names_from_sql = parse_table_names_from_sql + self.convert_urns_to_lowercase = convert_urns_to_lowercase + + def _run( + self, + validation_result_suite: ExpectationSuiteValidationResult, + validation_result_suite_identifier: Union[ + ValidationResultIdentifier, "GXCloudIdentifier" + ], + data_asset: Union[Validator, DataAsset, Batch], + payload: Optional[Any] = None, + expectation_suite_identifier: Optional[ExpectationSuiteIdentifier] = None, + checkpoint_identifier: Optional[Any] = None, + ) -> Dict: + datasets = [] + try: + emitter = DatahubRestEmitter( + gms_server=self.server_url, + token=self.token, + read_timeout_sec=self.timeout_sec, + connect_timeout_sec=self.timeout_sec, + retry_status_codes=self.retry_status_codes, + retry_max_times=self.retry_max_times, + extra_headers=self.extra_headers, + ) + + expectation_suite_name = validation_result_suite.meta.get( + "expectation_suite_name" + ) + run_id = validation_result_suite.meta.get("run_id") + if hasattr(data_asset, "active_batch_id"): + batch_identifier = data_asset.active_batch_id + else: + batch_identifier = data_asset.batch_id + + if isinstance( + validation_result_suite_identifier, ValidationResultIdentifier + ): + expectation_suite_name = ( + validation_result_suite_identifier.expectation_suite_identifier.expectation_suite_name + ) + run_id = validation_result_suite_identifier.run_id + batch_identifier = validation_result_suite_identifier.batch_identifier + + # Returns datasets and corresponding batch requests + datasets = self.get_dataset_partitions(batch_identifier, data_asset) + + if len(datasets) == 0 or datasets[0]["dataset_urn"] is None: + warn("Metadata not sent to datahub. No datasets found.") + return {"datahub_notification_result": "none required"} + + # Returns assertion info and assertion results + assertions = self.get_assertions_with_results( + validation_result_suite, + expectation_suite_name, + run_id, + payload, + datasets, + ) + + logger.info("Sending metadata to datahub ...") + logger.info("Dataset URN - {urn}".format(urn=datasets[0]["dataset_urn"])) + + for assertion in assertions: + logger.info( + "Assertion URN - {urn}".format(urn=assertion["assertionUrn"]) + ) + + # Construct a MetadataChangeProposalWrapper object. + assertion_info_mcp = MetadataChangeProposalWrapper( + entityUrn=assertion["assertionUrn"], + aspect=assertion["assertionInfo"], + ) + emitter.emit_mcp(assertion_info_mcp) + + # Construct a MetadataChangeProposalWrapper object. + assertion_platform_mcp = MetadataChangeProposalWrapper( + entityUrn=assertion["assertionUrn"], + aspect=assertion["assertionPlatform"], + ) + emitter.emit_mcp(assertion_platform_mcp) + + for assertionResult in assertion["assertionResults"]: + dataset_assertionResult_mcp = MetadataChangeProposalWrapper( + entityUrn=assertionResult.assertionUrn, + aspect=assertionResult, + ) + + # Emit Result! (timeseries aspect) + emitter.emit_mcp(dataset_assertionResult_mcp) + logger.info("Metadata sent to datahub.") + result = "DataHub notification succeeded" + except Exception as e: + result = "DataHub notification failed" + if self.graceful_exceptions: + logger.error(e) + logger.info("Suppressing error because graceful_exceptions is set") + else: + raise + + return {"datahub_notification_result": result} + + def get_assertions_with_results( + self, + validation_result_suite, + expectation_suite_name, + run_id, + payload, + datasets, + ): + dataPlatformInstance = DataPlatformInstance( + platform=builder.make_data_platform_urn(GE_PLATFORM_NAME) + ) + docs_link = None + if payload: + # process the payload + for action_names in payload.keys(): + if payload[action_names]["class"] == "UpdateDataDocsAction": + data_docs_pages = payload[action_names] + for docs_link_key, docs_link_val in data_docs_pages.items(): + if "file://" not in docs_link_val and docs_link_key != "class": + docs_link = docs_link_val + + assertions_with_results = [] + for result in validation_result_suite.results: + expectation_config = result["expectation_config"] + expectation_type = expectation_config["expectation_type"] + success = bool(result["success"]) + kwargs = { + k: v for k, v in expectation_config["kwargs"].items() if k != "batch_id" + } + + result = result["result"] + assertion_datasets = [d["dataset_urn"] for d in datasets] + if len(datasets) == 1 and "column" in kwargs: + assertion_fields = [ + builder.make_schema_field_urn( + datasets[0]["dataset_urn"], kwargs["column"] + ) + ] + else: + assertion_fields = None # type:ignore + + # Be careful what fields to consider for creating assertion urn. + # Any change in fields below would lead to a new assertion + # FIXME - Currently, when using evaluation parameters, new assertion is + # created when runtime resolved kwargs are different, + # possibly for each validation run + assertionUrn = builder.make_assertion_urn( + builder.datahub_guid( + pre_json_transform( + { + "platform": GE_PLATFORM_NAME, + "nativeType": expectation_type, + "nativeParameters": kwargs, + "dataset": assertion_datasets[0], + "fields": assertion_fields, + } + ) + ) + ) + logger.debug( + "GE expectation_suite_name - {name}, expectation_type - {type}, Assertion URN - {urn}".format( + name=expectation_suite_name, type=expectation_type, urn=assertionUrn + ) + ) + assertionInfo: AssertionInfo = self.get_assertion_info( + expectation_type, + kwargs, + assertion_datasets[0], + assertion_fields, + expectation_suite_name, + ) + + # TODO: Understand why their run time is incorrect. + run_time = run_id.run_time.astimezone(timezone.utc) + evaluation_parameters = ( + { + k: convert_to_string(v) + for k, v in validation_result_suite.evaluation_parameters.items() + if k and v + } + if validation_result_suite.evaluation_parameters + else None + ) + + nativeResults = { + k: convert_to_string(v) + for k, v in result.items() + if ( + k + in [ + "observed_value", + "partial_unexpected_list", + "partial_unexpected_counts", + "details", + ] + and v + ) + } + + actualAggValue = ( + result.get("observed_value") + if isinstance(result.get("observed_value"), (int, float)) + else None + ) + + ds = datasets[0] + # https://docs.greatexpectations.io/docs/reference/expectations/result_format/ + assertionResult = AssertionRunEvent( + timestampMillis=int(round(time.time() * 1000)), + assertionUrn=assertionUrn, + asserteeUrn=ds["dataset_urn"], + runId=run_time.strftime("%Y-%m-%dT%H:%M:%SZ"), + result=AssertionResult( + type=( + AssertionResultType.SUCCESS + if success + else AssertionResultType.FAILURE + ), + rowCount=parse_int_or_default(result.get("element_count")), + missingCount=parse_int_or_default(result.get("missing_count")), + unexpectedCount=parse_int_or_default( + result.get("unexpected_count") + ), + actualAggValue=actualAggValue, + externalUrl=docs_link, + nativeResults=nativeResults, + ), + batchSpec=ds["batchSpec"], + status=AssertionRunStatus.COMPLETE, + runtimeContext=evaluation_parameters, + ) + if ds.get("partitionSpec") is not None: + assertionResult.partitionSpec = ds.get("partitionSpec") + assertionResults = [assertionResult] + assertions_with_results.append( + { + "assertionUrn": assertionUrn, + "assertionInfo": assertionInfo, + "assertionPlatform": dataPlatformInstance, + "assertionResults": assertionResults, + } + ) + return assertions_with_results + + def get_assertion_info( + self, expectation_type, kwargs, dataset, fields, expectation_suite_name + ): + # TODO - can we find exact type of min and max value + def get_min_max(kwargs, type=AssertionStdParameterType.UNKNOWN): + return AssertionStdParameters( + minValue=AssertionStdParameter( + value=convert_to_string(kwargs.get("min_value")), + type=type, + ), + maxValue=AssertionStdParameter( + value=convert_to_string(kwargs.get("max_value")), + type=type, + ), + ) + + known_expectations: Dict[str, DataHubStdAssertion] = { + # column aggregate expectations + "expect_column_min_to_be_between": DataHubStdAssertion( + scope=DatasetAssertionScope.DATASET_COLUMN, + operator=AssertionStdOperator.BETWEEN, + aggregation=AssertionStdAggregation.MIN, + parameters=get_min_max(kwargs), + ), + "expect_column_max_to_be_between": DataHubStdAssertion( + scope=DatasetAssertionScope.DATASET_COLUMN, + operator=AssertionStdOperator.BETWEEN, + aggregation=AssertionStdAggregation.MAX, + parameters=get_min_max(kwargs), + ), + "expect_column_median_to_be_between": DataHubStdAssertion( + scope=DatasetAssertionScope.DATASET_COLUMN, + operator=AssertionStdOperator.BETWEEN, + aggregation=AssertionStdAggregation.MEDIAN, + parameters=get_min_max(kwargs), + ), + "expect_column_stdev_to_be_between": DataHubStdAssertion( + scope=DatasetAssertionScope.DATASET_COLUMN, + operator=AssertionStdOperator.BETWEEN, + aggregation=AssertionStdAggregation.STDDEV, + parameters=get_min_max(kwargs, AssertionStdParameterType.NUMBER), + ), + "expect_column_mean_to_be_between": DataHubStdAssertion( + scope=DatasetAssertionScope.DATASET_COLUMN, + operator=AssertionStdOperator.BETWEEN, + aggregation=AssertionStdAggregation.MEAN, + parameters=get_min_max(kwargs, AssertionStdParameterType.NUMBER), + ), + "expect_column_unique_value_count_to_be_between": DataHubStdAssertion( + scope=DatasetAssertionScope.DATASET_COLUMN, + operator=AssertionStdOperator.BETWEEN, + aggregation=AssertionStdAggregation.UNIQUE_COUNT, + parameters=get_min_max(kwargs, AssertionStdParameterType.NUMBER), + ), + "expect_column_proportion_of_unique_values_to_be_between": DataHubStdAssertion( + scope=DatasetAssertionScope.DATASET_COLUMN, + operator=AssertionStdOperator.BETWEEN, + aggregation=AssertionStdAggregation.UNIQUE_PROPOTION, + parameters=get_min_max(kwargs, AssertionStdParameterType.NUMBER), + ), + "expect_column_sum_to_be_between": DataHubStdAssertion( + scope=DatasetAssertionScope.DATASET_COLUMN, + operator=AssertionStdOperator.BETWEEN, + aggregation=AssertionStdAggregation.SUM, + parameters=get_min_max(kwargs, AssertionStdParameterType.NUMBER), + ), + "expect_column_quantile_values_to_be_between": DataHubStdAssertion( + scope=DatasetAssertionScope.DATASET_COLUMN, + operator=AssertionStdOperator.BETWEEN, + aggregation=AssertionStdAggregation._NATIVE_, + ), + # column map expectations + "expect_column_values_to_not_be_null": DataHubStdAssertion( + scope=DatasetAssertionScope.DATASET_COLUMN, + operator=AssertionStdOperator.NOT_NULL, + aggregation=AssertionStdAggregation.IDENTITY, + ), + "expect_column_values_to_be_in_set": DataHubStdAssertion( + scope=DatasetAssertionScope.DATASET_COLUMN, + operator=AssertionStdOperator.IN, + aggregation=AssertionStdAggregation.IDENTITY, + parameters=AssertionStdParameters( + value=AssertionStdParameter( + value=convert_to_string(kwargs.get("value_set")), + type=AssertionStdParameterType.SET, + ) + ), + ), + "expect_column_values_to_be_between": DataHubStdAssertion( + scope=DatasetAssertionScope.DATASET_COLUMN, + operator=AssertionStdOperator.BETWEEN, + aggregation=AssertionStdAggregation.IDENTITY, + parameters=get_min_max(kwargs), + ), + "expect_column_values_to_match_regex": DataHubStdAssertion( + scope=DatasetAssertionScope.DATASET_COLUMN, + operator=AssertionStdOperator.REGEX_MATCH, + aggregation=AssertionStdAggregation.IDENTITY, + parameters=AssertionStdParameters( + value=AssertionStdParameter( + value=kwargs.get("regex"), + type=AssertionStdParameterType.STRING, + ) + ), + ), + "expect_column_values_to_match_regex_list": DataHubStdAssertion( + scope=DatasetAssertionScope.DATASET_COLUMN, + operator=AssertionStdOperator.REGEX_MATCH, + aggregation=AssertionStdAggregation.IDENTITY, + parameters=AssertionStdParameters( + value=AssertionStdParameter( + value=convert_to_string(kwargs.get("regex_list")), + type=AssertionStdParameterType.LIST, + ) + ), + ), + "expect_table_columns_to_match_ordered_list": DataHubStdAssertion( + scope=DatasetAssertionScope.DATASET_SCHEMA, + operator=AssertionStdOperator.EQUAL_TO, + aggregation=AssertionStdAggregation.COLUMNS, + parameters=AssertionStdParameters( + value=AssertionStdParameter( + value=convert_to_string(kwargs.get("column_list")), + type=AssertionStdParameterType.LIST, + ) + ), + ), + "expect_table_columns_to_match_set": DataHubStdAssertion( + scope=DatasetAssertionScope.DATASET_SCHEMA, + operator=AssertionStdOperator.EQUAL_TO, + aggregation=AssertionStdAggregation.COLUMNS, + parameters=AssertionStdParameters( + value=AssertionStdParameter( + value=convert_to_string(kwargs.get("column_set")), + type=AssertionStdParameterType.SET, + ) + ), + ), + "expect_table_column_count_to_be_between": DataHubStdAssertion( + scope=DatasetAssertionScope.DATASET_SCHEMA, + operator=AssertionStdOperator.BETWEEN, + aggregation=AssertionStdAggregation.COLUMN_COUNT, + parameters=get_min_max(kwargs, AssertionStdParameterType.NUMBER), + ), + "expect_table_column_count_to_equal": DataHubStdAssertion( + scope=DatasetAssertionScope.DATASET_SCHEMA, + operator=AssertionStdOperator.EQUAL_TO, + aggregation=AssertionStdAggregation.COLUMN_COUNT, + parameters=AssertionStdParameters( + value=AssertionStdParameter( + value=convert_to_string(kwargs.get("value")), + type=AssertionStdParameterType.NUMBER, + ) + ), + ), + "expect_column_to_exist": DataHubStdAssertion( + scope=DatasetAssertionScope.DATASET_SCHEMA, + operator=AssertionStdOperator._NATIVE_, + aggregation=AssertionStdAggregation._NATIVE_, + ), + "expect_table_row_count_to_equal": DataHubStdAssertion( + scope=DatasetAssertionScope.DATASET_ROWS, + operator=AssertionStdOperator.EQUAL_TO, + aggregation=AssertionStdAggregation.ROW_COUNT, + parameters=AssertionStdParameters( + value=AssertionStdParameter( + value=convert_to_string(kwargs.get("value")), + type=AssertionStdParameterType.NUMBER, + ) + ), + ), + "expect_table_row_count_to_be_between": DataHubStdAssertion( + scope=DatasetAssertionScope.DATASET_ROWS, + operator=AssertionStdOperator.BETWEEN, + aggregation=AssertionStdAggregation.ROW_COUNT, + parameters=get_min_max(kwargs, AssertionStdParameterType.NUMBER), + ), + } + + datasetAssertionInfo = DatasetAssertionInfo( + dataset=dataset, + fields=fields, + operator=AssertionStdOperator._NATIVE_, + aggregation=AssertionStdAggregation._NATIVE_, + nativeType=expectation_type, + nativeParameters={k: convert_to_string(v) for k, v in kwargs.items()}, + scope=DatasetAssertionScope.DATASET_ROWS, + ) + + if expectation_type in known_expectations.keys(): + assertion = known_expectations[expectation_type] + datasetAssertionInfo.scope = assertion.scope + datasetAssertionInfo.aggregation = assertion.aggregation + datasetAssertionInfo.operator = assertion.operator + datasetAssertionInfo.parameters = assertion.parameters + + # Heuristically mapping other expectations + else: + if "column" in kwargs and expectation_type.startswith( + "expect_column_value" + ): + datasetAssertionInfo.scope = DatasetAssertionScope.DATASET_COLUMN + datasetAssertionInfo.aggregation = AssertionStdAggregation.IDENTITY + elif "column" in kwargs: + datasetAssertionInfo.scope = DatasetAssertionScope.DATASET_COLUMN + datasetAssertionInfo.aggregation = AssertionStdAggregation._NATIVE_ + + return AssertionInfo( + type=AssertionType.DATASET, + datasetAssertion=datasetAssertionInfo, + customProperties={"expectation_suite_name": expectation_suite_name}, + ) + + def get_dataset_partitions(self, batch_identifier, data_asset): + dataset_partitions = [] + + logger.debug("Finding datasets being validated") + + # for now, we support only v3-api and sqlalchemy execution engine and Pandas engine + is_sql_alchemy = isinstance(data_asset, Validator) and ( + isinstance(data_asset.execution_engine, SqlAlchemyExecutionEngine) + ) + is_pandas = isinstance(data_asset.execution_engine, PandasExecutionEngine) + if is_sql_alchemy or is_pandas: + ge_batch_spec = data_asset.active_batch_spec + partitionSpec = None + batchSpecProperties = { + "data_asset_name": str( + data_asset.active_batch_definition.data_asset_name + ), + "datasource_name": str( + data_asset.active_batch_definition.datasource_name + ), + } + sqlalchemy_uri = None + if is_sql_alchemy and isinstance( + data_asset.execution_engine.engine, Engine + ): + sqlalchemy_uri = data_asset.execution_engine.engine.url + # For snowflake sqlalchemy_execution_engine.engine is actually instance of Connection + elif is_sql_alchemy and isinstance( + data_asset.execution_engine.engine, Connection + ): + sqlalchemy_uri = data_asset.execution_engine.engine.engine.url + + if isinstance(ge_batch_spec, SqlAlchemyDatasourceBatchSpec): + # e.g. ConfiguredAssetSqlDataConnector with splitter_method or sampling_method + schema_name = ge_batch_spec.get("schema_name") + table_name = ge_batch_spec.get("table_name") + + dataset_urn = make_dataset_urn_from_sqlalchemy_uri( + sqlalchemy_uri, + schema_name, + table_name, + self.env, + self.get_platform_instance( + data_asset.active_batch_definition.datasource_name + ), + self.exclude_dbname, + self.platform_alias, + self.convert_urns_to_lowercase, + ) + batchSpec = BatchSpec( + nativeBatchId=batch_identifier, + customProperties=batchSpecProperties, + ) + + splitter_method = ge_batch_spec.get("splitter_method") + if ( + splitter_method is not None + and splitter_method != "_split_on_whole_table" + ): + batch_identifiers = ge_batch_spec.get("batch_identifiers", {}) + partitionSpec = PartitionSpecClass( + partition=convert_to_string(batch_identifiers) + ) + sampling_method = ge_batch_spec.get("sampling_method", "") + if sampling_method == "_sample_using_limit": + batchSpec.limit = ge_batch_spec["sampling_kwargs"]["n"] + + dataset_partitions.append( + { + "dataset_urn": dataset_urn, + "partitionSpec": partitionSpec, + "batchSpec": batchSpec, + } + ) + elif isinstance(ge_batch_spec, RuntimeQueryBatchSpec): + if not self.parse_table_names_from_sql: + warn( + "Enable parse_table_names_from_sql in DatahubValidationAction config\ + to try to parse the tables being asserted from SQL query" + ) + return [] + query = data_asset.batches[ + batch_identifier + ].batch_request.runtime_parameters["query"] + partitionSpec = PartitionSpecClass( + type=PartitionTypeClass.QUERY, + partition=f"Query_{builder.datahub_guid(pre_json_transform(query))}", + ) + + batchSpec = BatchSpec( + nativeBatchId=batch_identifier, + query=query, + customProperties=batchSpecProperties, + ) + try: + tables = DefaultSQLParser(query).get_tables() + except Exception as e: + logger.warning(f"Sql parser failed on {query} with {e}") + tables = [] + + if len(set(tables)) != 1: + warn( + "DataHubValidationAction does not support cross dataset assertions." + ) + return [] + for table in tables: + dataset_urn = make_dataset_urn_from_sqlalchemy_uri( + sqlalchemy_uri, + None, + table, + self.env, + self.get_platform_instance( + data_asset.active_batch_definition.datasource_name + ), + self.exclude_dbname, + self.platform_alias, + self.convert_urns_to_lowercase, + ) + dataset_partitions.append( + { + "dataset_urn": dataset_urn, + "partitionSpec": partitionSpec, + "batchSpec": batchSpec, + } + ) + elif isinstance(ge_batch_spec, RuntimeDataBatchSpec): + data_platform = self.get_platform_instance( + data_asset.active_batch_definition.datasource_name + ) + dataset_urn = builder.make_dataset_urn_with_platform_instance( + platform=( + data_platform + if self.platform_alias is None + else self.platform_alias + ), + name=data_asset.active_batch_definition.datasource_name, + platform_instance="", + env=self.env, + ) + batchSpec = BatchSpec( + nativeBatchId=batch_identifier, + query="", + customProperties=batchSpecProperties, + ) + dataset_partitions.append( + { + "dataset_urn": dataset_urn, + "partitionSpec": partitionSpec, + "batchSpec": batchSpec, + } + ) + else: + warn( + "DataHubValidationAction does not recognize this GE batch spec type- {batch_spec_type}.".format( + batch_spec_type=type(ge_batch_spec) + ) + ) + else: + # TODO - v2-spec - SqlAlchemyDataset support + warn( + "DataHubValidationAction does not recognize this GE data asset type - {asset_type}. This is either using v2-api or execution engine other than sqlalchemy.".format( + asset_type=type(data_asset) + ) + ) + + return dataset_partitions + + def get_platform_instance(self, datasource_name): + if self.platform_instance_map and datasource_name in self.platform_instance_map: + return self.platform_instance_map[datasource_name] + else: + warn( + f"Datasource {datasource_name} is not present in platform_instance_map" + ) + return None + + +def parse_int_or_default(value, default_value=None): + if value is None: + return default_value + else: + return int(value) + + +def make_dataset_urn_from_sqlalchemy_uri( + sqlalchemy_uri, + schema_name, + table_name, + env, + platform_instance=None, + exclude_dbname=None, + platform_alias=None, + convert_urns_to_lowercase=False, +): + data_platform = get_platform_from_sqlalchemy_uri(str(sqlalchemy_uri)) + url_instance = make_url(sqlalchemy_uri) + + if schema_name is None and "." in table_name: + schema_name, table_name = table_name.split(".")[-2:] + + if data_platform in ["redshift", "postgres"]: + schema_name = schema_name or "public" + if url_instance.database is None: + warn( + f"DataHubValidationAction failed to locate database name for {data_platform}." + ) + return None + schema_name = ( + schema_name if exclude_dbname else f"{url_instance.database}.{schema_name}" + ) + elif data_platform == "mssql": + schema_name = schema_name or "dbo" + if url_instance.database is None: + warn( + f"DataHubValidationAction failed to locate database name for {data_platform}." + ) + return None + schema_name = ( + schema_name if exclude_dbname else f"{url_instance.database}.{schema_name}" + ) + elif data_platform in ["trino", "snowflake"]: + if schema_name is None or url_instance.database is None: + warn( + "DataHubValidationAction failed to locate schema name and/or database name for {data_platform}.".format( + data_platform=data_platform + ) + ) + return None + # If data platform is snowflake, we artificially lowercase the Database name. + # This is because DataHub also does this during ingestion. + # Ref: https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py#L155 + database_name = ( + url_instance.database.lower() + if data_platform == "snowflake" + else url_instance.database + ) + if database_name.endswith(f"/{schema_name}"): + database_name = database_name[: -len(f"/{schema_name}")] + schema_name = ( + schema_name if exclude_dbname else f"{database_name}.{schema_name}" + ) + + elif data_platform == "bigquery": + if url_instance.host is None or url_instance.database is None: + warn( + "DataHubValidationAction failed to locate host and/or database name for {data_platform}. ".format( + data_platform=data_platform + ) + ) + return None + schema_name = f"{url_instance.host}.{url_instance.database}" + + schema_name = schema_name or url_instance.database + if schema_name is None: + warn( + f"DataHubValidationAction failed to locate schema name for {data_platform}." + ) + return None + + dataset_name = f"{schema_name}.{table_name}" + + if convert_urns_to_lowercase: + dataset_name = dataset_name.lower() + + dataset_urn = builder.make_dataset_urn_with_platform_instance( + platform=data_platform if platform_alias is None else platform_alias, + name=dataset_name, + platform_instance=platform_instance, + env=env, + ) + + return dataset_urn + + +@dataclass +class DataHubStdAssertion: + scope: Union[str, DatasetAssertionScope] + operator: Union[str, AssertionStdOperator] + aggregation: Union[str, AssertionStdAggregation] + parameters: Optional[AssertionStdParameters] = None + + +class DecimalEncoder(json.JSONEncoder): + def default(self, o): + if isinstance(o, Decimal): + return str(o) + return super().default(o) + + +def convert_to_string(var: Any) -> str: + try: + tmp = ( + str(var) + if isinstance(var, (str, int, float)) + else json.dumps(var, cls=DecimalEncoder) + ) + except TypeError as e: + logger.debug(e) + tmp = str(var) + return tmp + + +def warn(msg): + logger.warning(msg) diff --git a/metadata-ingestion/src/datahub/ingestion/source_config/usage/__init__.py b/metadata-ingestion-modules/gx-plugin/tests/__init__.py similarity index 100% rename from metadata-ingestion/src/datahub/ingestion/source_config/usage/__init__.py rename to metadata-ingestion-modules/gx-plugin/tests/__init__.py diff --git a/metadata-ingestion-modules/gx-plugin/tests/conftest.py b/metadata-ingestion-modules/gx-plugin/tests/conftest.py new file mode 100644 index 00000000000000..c99230fba30949 --- /dev/null +++ b/metadata-ingestion-modules/gx-plugin/tests/conftest.py @@ -0,0 +1 @@ +from datahub.testing.docker_utils import docker_compose_runner # noqa: F401 diff --git a/metadata-ingestion/tests/integration/great-expectations/docker-compose.yml b/metadata-ingestion-modules/gx-plugin/tests/integration/docker-compose.yml similarity index 100% rename from metadata-ingestion/tests/integration/great-expectations/docker-compose.yml rename to metadata-ingestion-modules/gx-plugin/tests/integration/docker-compose.yml diff --git a/metadata-ingestion/tests/integration/great-expectations/ge_mcps_golden.json b/metadata-ingestion-modules/gx-plugin/tests/integration/ge_mcps_golden.json similarity index 100% rename from metadata-ingestion/tests/integration/great-expectations/ge_mcps_golden.json rename to metadata-ingestion-modules/gx-plugin/tests/integration/ge_mcps_golden.json diff --git a/metadata-ingestion/tests/integration/great-expectations/ge_mcps_golden_2.json b/metadata-ingestion-modules/gx-plugin/tests/integration/ge_mcps_golden_2.json similarity index 100% rename from metadata-ingestion/tests/integration/great-expectations/ge_mcps_golden_2.json rename to metadata-ingestion-modules/gx-plugin/tests/integration/ge_mcps_golden_2.json diff --git a/metadata-ingestion/tests/integration/great-expectations/setup/great_expectations/checkpoints/test_checkpoint.yml b/metadata-ingestion-modules/gx-plugin/tests/integration/setup/great_expectations/checkpoints/test_checkpoint.yml similarity index 97% rename from metadata-ingestion/tests/integration/great-expectations/setup/great_expectations/checkpoints/test_checkpoint.yml rename to metadata-ingestion-modules/gx-plugin/tests/integration/setup/great_expectations/checkpoints/test_checkpoint.yml index 466cbfe39a4ab0..0e6fa886d57847 100644 --- a/metadata-ingestion/tests/integration/great-expectations/setup/great_expectations/checkpoints/test_checkpoint.yml +++ b/metadata-ingestion-modules/gx-plugin/tests/integration/setup/great_expectations/checkpoints/test_checkpoint.yml @@ -19,7 +19,7 @@ action_list: site_names: [] - name: datahub_action action: - module_name: datahub.integrations.great_expectations.action + module_name: datahub_gx_plugin.action class_name: DataHubValidationAction server_url: http://localhost:8080 graceful_exceptions: False diff --git a/metadata-ingestion/tests/integration/great-expectations/setup/great_expectations/checkpoints/test_checkpoint_2.yml b/metadata-ingestion-modules/gx-plugin/tests/integration/setup/great_expectations/checkpoints/test_checkpoint_2.yml similarity index 97% rename from metadata-ingestion/tests/integration/great-expectations/setup/great_expectations/checkpoints/test_checkpoint_2.yml rename to metadata-ingestion-modules/gx-plugin/tests/integration/setup/great_expectations/checkpoints/test_checkpoint_2.yml index 409d93f64db160..d0fa2a8c179920 100644 --- a/metadata-ingestion/tests/integration/great-expectations/setup/great_expectations/checkpoints/test_checkpoint_2.yml +++ b/metadata-ingestion-modules/gx-plugin/tests/integration/setup/great_expectations/checkpoints/test_checkpoint_2.yml @@ -19,7 +19,7 @@ action_list: site_names: [] - name: datahub_action action: - module_name: datahub.integrations.great_expectations.action + module_name: datahub_gx_plugin.action class_name: DataHubValidationAction server_url: http://localhost:8080 graceful_exceptions: False diff --git a/metadata-ingestion/tests/integration/great-expectations/setup/great_expectations/expectations/.ge_store_backend_id b/metadata-ingestion-modules/gx-plugin/tests/integration/setup/great_expectations/expectations/.ge_store_backend_id similarity index 100% rename from metadata-ingestion/tests/integration/great-expectations/setup/great_expectations/expectations/.ge_store_backend_id rename to metadata-ingestion-modules/gx-plugin/tests/integration/setup/great_expectations/expectations/.ge_store_backend_id diff --git a/metadata-ingestion/tests/integration/great-expectations/setup/great_expectations/expectations/test_suite.json b/metadata-ingestion-modules/gx-plugin/tests/integration/setup/great_expectations/expectations/test_suite.json similarity index 100% rename from metadata-ingestion/tests/integration/great-expectations/setup/great_expectations/expectations/test_suite.json rename to metadata-ingestion-modules/gx-plugin/tests/integration/setup/great_expectations/expectations/test_suite.json diff --git a/metadata-ingestion/tests/integration/great-expectations/setup/great_expectations/great_expectations.yml b/metadata-ingestion-modules/gx-plugin/tests/integration/setup/great_expectations/great_expectations.yml similarity index 100% rename from metadata-ingestion/tests/integration/great-expectations/setup/great_expectations/great_expectations.yml rename to metadata-ingestion-modules/gx-plugin/tests/integration/setup/great_expectations/great_expectations.yml diff --git a/metadata-ingestion/tests/integration/great-expectations/setup/great_expectations/plugins/custom_data_docs/styles/data_docs_custom_styles.css b/metadata-ingestion-modules/gx-plugin/tests/integration/setup/great_expectations/plugins/custom_data_docs/styles/data_docs_custom_styles.css similarity index 100% rename from metadata-ingestion/tests/integration/great-expectations/setup/great_expectations/plugins/custom_data_docs/styles/data_docs_custom_styles.css rename to metadata-ingestion-modules/gx-plugin/tests/integration/setup/great_expectations/plugins/custom_data_docs/styles/data_docs_custom_styles.css diff --git a/metadata-ingestion/tests/integration/great-expectations/setup/setup.sql b/metadata-ingestion-modules/gx-plugin/tests/integration/setup/setup.sql similarity index 100% rename from metadata-ingestion/tests/integration/great-expectations/setup/setup.sql rename to metadata-ingestion-modules/gx-plugin/tests/integration/setup/setup.sql diff --git a/metadata-ingestion/tests/integration/great-expectations/test_great_expectations.py b/metadata-ingestion-modules/gx-plugin/tests/integration/test_great_expectations.py similarity index 68% rename from metadata-ingestion/tests/integration/great-expectations/test_great_expectations.py rename to metadata-ingestion-modules/gx-plugin/tests/integration/test_great_expectations.py index 0bb87b993e6b06..b03681dc780584 100644 --- a/metadata-ingestion/tests/integration/great-expectations/test_great_expectations.py +++ b/metadata-ingestion-modules/gx-plugin/tests/integration/test_great_expectations.py @@ -1,17 +1,30 @@ +import os import shutil from typing import List from unittest import mock +import packaging.version import pytest -from freezegun import freeze_time -from great_expectations.data_context.data_context.file_data_context import ( - FileDataContext, -) - from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.sink.file import write_metadata_file -from tests.test_helpers import mce_helpers -from tests.test_helpers.docker_helpers import wait_for_port +from datahub.testing.compare_metadata_json import assert_metadata_files_equal +from datahub.testing.docker_utils import wait_for_port +from freezegun import freeze_time +from great_expectations.data_context import FileDataContext + +try: + from great_expectations import __version__ as GX_VERSION # type: ignore + + use_gx_folder = packaging.version.parse(GX_VERSION) > packaging.version.Version( + "0.17.0" + ) +except Exception: + use_gx_folder = False + + +def should_update_golden_file() -> bool: + return bool(os.getenv("DATAHUB_GOLDEN_FILE_UPDATE", False)) + FROZEN_TIME = "2021-12-28 12:00:00" @@ -40,12 +53,11 @@ def test_ge_ingest( docker_compose_runner, pytestconfig, tmp_path, - mock_time, checkpoint, golden_json, **kwargs, ): - test_resources_dir = pytestconfig.rootpath / "tests/integration/great-expectations" + test_resources_dir = pytestconfig.rootpath / "tests/integration" with docker_compose_runner( test_resources_dir / "docker-compose.yml", "great-expectations" @@ -57,18 +69,21 @@ def test_ge_ingest( emitter = MockDatahubEmitter("") mock_emit_mcp.side_effect = emitter.emit_mcp + gx_context_folder_name = "gx" if use_gx_folder else "great_expectations" shutil.copytree( test_resources_dir / "setup/great_expectations", - tmp_path / "great_expectations", + tmp_path / gx_context_folder_name, ) + context = FileDataContext.create(tmp_path) context.run_checkpoint(checkpoint_name=checkpoint) emitter.write_to_file(tmp_path / "ge_mcps.json") - mce_helpers.check_golden_file( - pytestconfig, + assert_metadata_files_equal( output_path=tmp_path / "ge_mcps.json", golden_path=test_resources_dir / golden_json, + copy_output=False, + update_golden=should_update_golden_file(), ignore_paths=[], ) diff --git a/metadata-ingestion/tests/unit/test_great_expectations_action.py b/metadata-ingestion-modules/gx-plugin/tests/unit/test_great_expectations_action.py similarity index 98% rename from metadata-ingestion/tests/unit/test_great_expectations_action.py rename to metadata-ingestion-modules/gx-plugin/tests/unit/test_great_expectations_action.py index 2e23949d296893..c870a4449abea1 100644 --- a/metadata-ingestion/tests/unit/test_great_expectations_action.py +++ b/metadata-ingestion-modules/gx-plugin/tests/unit/test_great_expectations_action.py @@ -4,6 +4,22 @@ import pandas as pd import pytest +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.metadata.schema_classes import ( + AssertionInfoClass, + AssertionResultClass, + AssertionResultTypeClass, + AssertionRunEventClass, + AssertionRunStatusClass, + AssertionStdParameterClass, + AssertionStdParametersClass, + AssertionTypeClass, + BatchSpecClass, + DataPlatformInstanceClass, + DatasetAssertionInfoClass, + DatasetAssertionScopeClass, + PartitionSpecClass, +) from great_expectations.core.batch import Batch, BatchDefinition, BatchRequest from great_expectations.core.batch_spec import ( RuntimeDataBatchSpec, @@ -14,10 +30,7 @@ ) from great_expectations.core.id_dict import IDDict from great_expectations.core.run_identifier import RunIdentifier -from great_expectations.data_context import DataContext -from great_expectations.data_context.data_context.file_data_context import ( - FileDataContext, -) +from great_expectations.data_context import DataContext, FileDataContext from great_expectations.data_context.types.resource_identifiers import ( ExpectationSuiteIdentifier, ValidationResultIdentifier, @@ -33,23 +46,7 @@ ) from great_expectations.validator.validator import Validator -from datahub.emitter.mcp import MetadataChangeProposalWrapper -from datahub.integrations.great_expectations.action import DataHubValidationAction -from datahub.metadata.schema_classes import ( - AssertionInfoClass, - AssertionResultClass, - AssertionResultTypeClass, - AssertionRunEventClass, - AssertionRunStatusClass, - AssertionStdParameterClass, - AssertionStdParametersClass, - AssertionTypeClass, - BatchSpecClass, - DataPlatformInstanceClass, - DatasetAssertionInfoClass, - DatasetAssertionScopeClass, - PartitionSpecClass, -) +from datahub_gx_plugin.action import DataHubValidationAction logger = logging.getLogger(__name__) diff --git a/metadata-ingestion/developing.md b/metadata-ingestion/developing.md index e0dbc7c8d4b145..b37c4e5ad96738 100644 --- a/metadata-ingestion/developing.md +++ b/metadata-ingestion/developing.md @@ -68,6 +68,18 @@ cd metadata-ingestion-modules/dagster-plugin source venv/bin/activate datahub version # should print "DataHub CLI version: unavailable (installed in develop mode)" ``` + +### (Optional) Set up your Python environment for developing on GX Plugin + +From the repository root: + +```shell +cd metadata-ingestion-modules/gx-plugin +../../gradlew :metadata-ingestion-modules:gx-plugin:installDev +source venv/bin/activate +datahub version # should print "DataHub CLI version: unavailable (installed in develop mode)" +``` + ### Common setup issues Common issues (click to expand): diff --git a/metadata-ingestion/docs/sources/bigquery/bigquery_recipe.yml b/metadata-ingestion/docs/sources/bigquery/bigquery_recipe.yml index c91aef929889ee..2efa1e5513caa6 100644 --- a/metadata-ingestion/docs/sources/bigquery/bigquery_recipe.yml +++ b/metadata-ingestion/docs/sources/bigquery/bigquery_recipe.yml @@ -1,8 +1,7 @@ source: type: bigquery config: - # `schema_pattern` for BQ Datasets - schema_pattern: + dataset_pattern: allow: - finance_bq_dataset table_pattern: diff --git a/metadata-ingestion/docs/sources/dbt/dbt.md b/metadata-ingestion/docs/sources/dbt/dbt.md index eca5101e006426..2333ddcee677bd 100644 --- a/metadata-ingestion/docs/sources/dbt/dbt.md +++ b/metadata-ingestion/docs/sources/dbt/dbt.md @@ -62,7 +62,11 @@ We support the following operations: 1. add_tag - Requires `tag` property in config. 2. add_term - Requires `term` property in config. 3. add_terms - Accepts an optional `separator` property in config. -4. add_owner - Requires `owner_type` property in config which can be either user or group. Optionally accepts the `owner_category` config property which can be set to either a [custom ownership type](../../../../docs/ownership/ownership-types.md) urn like `urn:li:ownershipType:architect` or one of `['TECHNICAL_OWNER', 'BUSINESS_OWNER', 'DATA_STEWARD', 'DATAOWNER'` (defaults to `DATAOWNER`). +4. add_owner - Requires `owner_type` property in config which can be either `user` or `group`. Optionally accepts the `owner_category` config property which can be set to either a [custom ownership type](../../../../docs/ownership/ownership-types.md) urn like `urn:li:ownershipType:architect` or one of `['TECHNICAL_OWNER', 'BUSINESS_OWNER', 'DATA_STEWARD', 'DATAOWNER'` (defaults to `DATAOWNER`). + + - The `owner_type` property will be ignored if the owner is a fully qualified urn. + - You can use commas to specify multiple owners - e.g. `business_owner: "jane,john,urn:li:corpGroup:data-team"`. + 5. add_doc_link - Requires `link` and `description` properties in config. Upon ingestion run, this will overwrite current links in the institutional knowledge section with this new link. The anchor text is defined here in the meta_mappings as `description`. Note: diff --git a/metadata-ingestion/docs/sources/s3/s3.md b/metadata-ingestion/docs/sources/s3/s3.md index 9484cd8de6666e..90a2c30882d529 100644 --- a/metadata-ingestion/docs/sources/s3/s3.md +++ b/metadata-ingestion/docs/sources/s3/s3.md @@ -3,16 +3,31 @@ Path Specs (`path_specs`) is a list of Path Spec (`path_spec`) objects where each individual `path_spec` represents one or more datasets. Include path (`path_spec.include`) represents formatted path to the dataset. This path must end with `*.*` or `*.[ext]` to represent leaf level. If `*.[ext]` is provided then files with only specified extension type will be scanned. "`.[ext]`" can be any of [supported file types](#supported-file-types). Refer [example 1](#example-1---individual-file-as-dataset) below for more details. -All folder levels need to be specified in include path. You can use `/*/` to represent a folder level and avoid specifying exact folder name. To map folder as a dataset, use `{table}` placeholder to represent folder level for which dataset is to be created. For a partitioned dataset, you can use placeholder `{partition_key[i]}` to represent name of `i`th partition and `{partition[i]}` to represent value of `i`th partition. During ingestion, `i` will be used to match partition_key to partition. Refer [example 2 and 3](#example-2---folder-of-files-as-dataset-without-partitions) below for more details. +All folder levels need to be specified in include path. You can use `/*/` to represent a folder level and avoid specifying exact folder name. To map folder as a dataset, use `{table}` placeholder to represent folder level for which dataset is to be created. For a partitioned dataset, you can use placeholder `{partition_key[i]}` to represent name of `i`th partition and `{partition_value[i]}` to represent value of `i`th partition. During ingestion, `i` will be used to match partition_key to partition. Refer [example 2 and 3](#example-2---folder-of-files-as-dataset-without-partitions) below for more details. Exclude paths (`path_spec.exclude`) can be used to ignore paths that are not relevant to current `path_spec`. This path cannot have named variables ( `{}` ). Exclude path can have `**` to represent multiple folder levels. Refer [example 4](#example-4---folder-of-files-as-dataset-with-partitions-and-exclude-filter) below for more details. Refer [example 5](#example-5---advanced---either-individual-file-or-folder-of-files-as-dataset) if your bucket has more complex dataset representation. + **Additional points to note** - Folder names should not contain {, }, *, / in their names. - Named variable {folder} is reserved for internal working. please do not use in named variables. +#### Partitioned Dataset support +If your dataset is partitioned by the `partition_key`=`partition_value` format, then the partition values are auto-detected. + +Otherwise, you can specify partitions in the following way in the path_spec: +1. Specify partition_key and partition_value in the path like => `{partition_key[0]}={partition_value[0]}/{partition_key[1]}={partition_value[1]}/{partition_key[2]}={partition_value[2]}` +2. Partition key can be specify using named variables in the path_spec like => `year={year}/month={month}/day={day}` +3 if the path is in the form of /value1/value2/value3 the source infer partition value from the path and assign partition_0, partition_1, partition_2 etc + +Dataset creation time is determined by the creation time of earliest created file in the lowest partition while last updated time is determined by the last updated time of the latest updated file in the highest partition. + +How the source determines the highest/lowest partition it is based on the traversal method set in the path_spec. +- If the traversal method is set to `MAX` then the source will try to find the latest partition by ordering the partitions each level and find the latest partiton. This traversal method won't look for earilest partition/creation time but this is the fastest. +- If the traversal method is set to `MIN_MAX` then the source will try to find the latest and earliest partition by ordering the partitions each level and find the latest/earliest partiton. This traversal sort folders purely by name therefor it is fast but it doesn't guarantee the latest partition will have the latest created file. +- If the traversal method is set to `ALL` then the source will try to find the latest and earliest partition by listing all the files in all the partitions and find the creation/last modification time based on the file creations. This is the slowest but for non time partitioned datasets this is the only way to find the latest/earliest partition. ### Path Specs - Examples #### Example 1 - Individual file as Dataset @@ -73,7 +88,12 @@ test-bucket Path specs config to ingest folders `orders` and `returns` as datasets: ``` path_specs: - - include: s3://test-bucket/{table}/{partition_key[0]}={partition[0]}/{partition_key[1]}={partition[1]}/*.parquet + - include: s3://test-bucket/{table}/{partition_key[0]}={partition_value[0]}/{partition_key[1]}={partition_value[1]}/*.parquet +``` +or with partition auto-detection: +``` +path_specs: + - include: s3://test-bucket/{table}/ ``` One can also use `include: s3://test-bucket/{table}/*/*/*.parquet` here however above format is preferred as it allows declaring partitions explicitly. @@ -99,11 +119,15 @@ test-bucket Path specs config to ingest folder `orders` as dataset but not folder `tmp_orders`: ``` path_specs: - - include: s3://test-bucket/{table}/{partition_key[0]}={partition[0]}/{partition_key[1]}={partition[1]}/*.parquet + - include: s3://test-bucket/{table}/{partition_key[0]}={partition_value[0]}/{partition_key[1]}={partition_value[1]}/*.parquet exclude: - **/tmp_orders/** ``` - +or with partition auto-detection: +``` +path_specs: + - include: s3://test-bucket/{table}/ +``` #### Example 5 - Advanced - Either Individual file OR Folder of files as Dataset @@ -150,6 +174,7 @@ Above config has 3 path_specs and will ingest following datasets s3://my-bucket/foo/tests/bar.avro # single file table s3://my-bucket/foo/tests/*.* # mulitple file level tables s3://my-bucket/foo/tests/{table}/*.avro #table without partition +s3://my-bucket/foo/tests/{table}/ #table with partition autodetection. Partition only can be detected if it is in the format of key=value s3://my-bucket/foo/tests/{table}/*/*.avro #table where partitions are not specified s3://my-bucket/foo/tests/{table}/*.* # table where no partitions as well as data type specified s3://my-bucket/{dept}/tests/{table}/*.avro # specifying keywords to be used in display name diff --git a/metadata-ingestion/docs/transformer/dataset_transformer.md b/metadata-ingestion/docs/transformer/dataset_transformer.md index ac6fefc3095741..03a224bcf7da47 100644 --- a/metadata-ingestion/docs/transformer/dataset_transformer.md +++ b/metadata-ingestion/docs/transformer/dataset_transformer.md @@ -1207,20 +1207,51 @@ The config, which we’d append to our ingestion recipe YAML, would look like th | Field | Required | Type | Default | Description | |---------------------------------------|----------|----------------------|-------------|---------------------------------------------------------------------------------------------| | `dataset_to_data_product_urns_pattern`| ✅ | map[regx, urn] | | Dataset Entity urn with regular expression and dataproduct urn apply to matching entity urn.| +| `is_container` | | bool | `false` | Whether to also consider a container or not. If true, the data product will be attached to both the dataset and its container. | -Let’s suppose we’d like to append a series of dataproducts with specific datasets as its assets. To do so, we can use the `pattern_add_dataset_dataproduct` module that’s included in the ingestion framework. This will match the regex pattern to `urn` of the dataset and create the data product entity with given urn and matched datasets as its assets. + +Let’s suppose we’d like to append a series of data products with specific datasets or their containers as assets. To do so, we can use the pattern_add_dataset_dataproduct module that’s included in the ingestion framework. This module matches a regex pattern to the urn of the dataset and creates a data product entity with the given urn, associating the matched datasets as its assets. + +If the is_container field is set to true, the module will not only attach the data product to the matching datasets but will also find and attach the containers associated with those datasets. This means that both the datasets and their containers will be associated with the specified data product. The config, which we’d append to our ingestion recipe YAML, would look like this: +- Add Product to dataset + ```yaml + transformers: + - type: "pattern_add_dataset_dataproduct" + config: + dataset_to_data_product_urns_pattern: + rules: + ".*example1.*": "urn:li:dataProduct:first" + ".*example2.*": "urn:li:dataProduct:second" + ``` +- Add Product to dataset container ```yaml transformers: - type: "pattern_add_dataset_dataproduct" config: + is_container: true dataset_to_data_product_urns_pattern: rules: ".*example1.*": "urn:li:dataProduct:first" ".*example2.*": "urn:li:dataProduct:second" ``` +⚠️ Warning: +When working with two datasets in the same container but with different data products, only one data product can be attached to the container. + +For example: +```yaml +transformers: + - type: "pattern_add_dataset_dataproduct" + config: + is_container: true + dataset_to_data_product_urns_pattern: + rules: + ".*example1.*": "urn:li:dataProduct:first" + ".*example2.*": "urn:li:dataProduct:second" +``` +If example1 and example2 are in the same container, only urn:li:dataProduct:first will be added. However, if they are in separate containers, the system works as expected and assigns the correct data product URNs. ## Add Dataset dataProduct ### Config Details diff --git a/metadata-ingestion/examples/mce_files/bootstrap_mce.json b/metadata-ingestion/examples/mce_files/bootstrap_mce.json index fbe6b9953cb4fa..bc218e5e8c2d53 100644 --- a/metadata-ingestion/examples/mce_files/bootstrap_mce.json +++ b/metadata-ingestion/examples/mce_files/bootstrap_mce.json @@ -3394,7 +3394,7 @@ "changeType":"UPSERT", "aspectName":"datasetProfile", "aspect":{ - "value":"{\"timestampMillis\": 1679515693000, \"rowCount\": 4500, \"columnCount\": 2, \"sizeInBytes\": 842000200000, \"fieldProfiles\": [{\"fieldPath\": \"field_foo\", \"uniqueCount\": 2, \"uniqueProportion\": 0.00044, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"true\", \"false\"]}, {\"fieldPath\": \"field_bar\", \"uniqueCount\": 2, \"uniqueProportion\": 0.00044, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"false\"]}]}", + "value":"{\"timestampMillis\": 1723488954865, \"rowCount\": 4500, \"columnCount\": 2, \"sizeInBytes\": 842000200000, \"fieldProfiles\": [{\"fieldPath\": \"field_foo\", \"uniqueCount\": 2, \"uniqueProportion\": 0.00044, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"true\", \"false\"]}, {\"fieldPath\": \"field_bar\", \"uniqueCount\": 2, \"uniqueProportion\": 0.00044, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"false\"]}]}", "contentType":"application/json" }, "systemMetadata":null @@ -3406,7 +3406,7 @@ "changeType":"UPSERT", "aspectName":"datasetProfile", "aspect":{ - "value":"{\"timestampMillis\": 1684786093000, \"rowCount\": 3500, \"columnCount\": 2, \"fieldProfiles\": [{\"fieldPath\": \"field_foo\", \"uniqueCount\": 2, \"uniqueProportion\": 0.00057, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"true\", \"false\"]}, {\"fieldPath\": \"field_bar\", \"uniqueCount\": 2, \"uniqueProportion\": 0.00057, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"true\"]}]}", + "value":"{\"timestampMillis\": 1723488954865, \"rowCount\": 3500, \"columnCount\": 2, \"fieldProfiles\": [{\"fieldPath\": \"field_foo\", \"uniqueCount\": 2, \"uniqueProportion\": 0.00057, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"true\", \"false\"]}, {\"fieldPath\": \"field_bar\", \"uniqueCount\": 2, \"uniqueProportion\": 0.00057, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"true\"]}]}", "contentType":"application/json" }, "systemMetadata":null diff --git a/metadata-ingestion/integration_docs/great-expectations.md b/metadata-ingestion/integration_docs/great-expectations.md index 80f5bedf42661a..9a4097a8f3af35 100644 --- a/metadata-ingestion/integration_docs/great-expectations.md +++ b/metadata-ingestion/integration_docs/great-expectations.md @@ -23,7 +23,7 @@ This integration does not support 1. Install the required dependency in your Great Expectations environment. ```shell - pip install 'acryl-datahub[great-expectations]' + pip install 'acryl-datahub-gx-plugin' ``` @@ -32,7 +32,7 @@ This integration does not support action_list: - name: datahub_action action: - module_name: datahub.integrations.great_expectations.action + module_name: module_name: datahub_gx_plugin.action class_name: DataHubValidationAction server_url: http://localhost:8080 #datahub server url ``` diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index abb716d2434ac6..e945cb38367df6 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -8,7 +8,9 @@ _version: str = package_metadata["__version__"] _self_pin = ( - f"=={_version}" if not (_version.endswith("dev0") or "docker" in _version) else "" + f"=={_version}" + if not (_version.endswith(("dev0", "dev1")) or "docker" in _version) + else "" ) base_requirements = { @@ -173,6 +175,7 @@ *sqlglot_lib, "GitPython>2", "python-liquid", + "deepmerge>=1.1.1", } bigquery_common = { @@ -180,6 +183,7 @@ "google-cloud-logging<=3.5.0", "google-cloud-bigquery", "google-cloud-datacatalog>=1.5.0", + "google-cloud-resource-manager", "more-itertools>=8.12.0", "sqlalchemy-bigquery>=1.4.1", } @@ -234,7 +238,7 @@ # Instead, we put the fix in our PyHive fork, so no thrift pin is needed. } -microsoft_common = {"msal==1.22.0"} +microsoft_common = {"msal>=1.22.0"} iceberg_common = { # Iceberg Python SDK @@ -267,7 +271,7 @@ abs_base = { "azure-core==1.29.4", - "azure-identity>=1.14.0", + "azure-identity>=1.17.1", "azure-storage-blob>=12.19.0", "azure-storage-file-datalake>=12.14.0", "more-itertools>=8.12.0", @@ -297,7 +301,7 @@ databricks = { # 0.1.11 appears to have authentication issues with azure databricks - "databricks-sdk>=0.9.0", + "databricks-sdk>=0.30.0", "pyspark~=3.3.0", "requests", # Version 2.4.0 includes sqlalchemy dialect, 2.8.0 includes some bug fixes @@ -330,7 +334,9 @@ "gql[requests]>=3.3.0", }, "datahub": mysql | kafka_common, - "great-expectations": sql_common | sqllineage_lib, + "great-expectations": { + f"acryl-datahub-gx-plugin{_self_pin}", + }, # Misc plugins. "sql-parser": sqlglot_lib, # Source plugins @@ -353,6 +359,7 @@ "google-cloud-datacatalog-lineage==0.2.2", } | classification_lib, + "bigquery-queries": sql_common | bigquery_common | sqlglot_lib, "clickhouse": sql_common | clickhouse_common, "clickhouse-usage": sql_common | usage_common | clickhouse_common, "cockroachdb": sql_common | postgres_common | {"sqlalchemy-cockroachdb<2.0.0"}, @@ -480,6 +487,9 @@ # The Airflow extra is only retained for compatibility, but new users should # be using the datahub-airflow-plugin package instead. "airflow", + # The great-expectations extra is only retained for compatibility, but new users should + # be using the datahub-gx-plugin package instead. + "great-expectations", # SQL Server ODBC requires additional drivers, and so we don't want to keep # it included in the default "all" installation. "mssql-odbc", @@ -525,9 +535,12 @@ } -pytest_dep = "pytest>=6.2.2" -deepdiff_dep = "deepdiff" -test_api_requirements = {pytest_dep, deepdiff_dep, "PyYAML"} +test_api_requirements = { + "pytest>=6.2.2", + "deepdiff", + "PyYAML", + "pytest-docker>=1.1.0", +} debug_requirements = { "memray", @@ -549,12 +562,9 @@ "isort>=5.7.0", "mypy==1.10.1", *test_api_requirements, - pytest_dep, "pytest-asyncio>=0.16.0", "pytest-cov>=2.8.1", - "pytest-docker>=1.1.0", "pytest-random-order~=1.1.0", - deepdiff_dep, "requests-mock", "freezegun", "jsonpickle", @@ -588,7 +598,6 @@ "kafka", "datahub-rest", "datahub-lite", - "great-expectations", "presto", "redash", "redshift", @@ -660,6 +669,7 @@ "athena = datahub.ingestion.source.sql.athena:AthenaSource", "azure-ad = datahub.ingestion.source.identity.azure_ad:AzureADSource", "bigquery = datahub.ingestion.source.bigquery_v2.bigquery:BigqueryV2Source", + "bigquery-queries = datahub.ingestion.source.bigquery_v2.bigquery_queries:BigQueryQueriesSource", "clickhouse = datahub.ingestion.source.sql.clickhouse:ClickHouseSource", "clickhouse-usage = datahub.ingestion.source.usage.clickhouse_usage:ClickHouseUsageSource", "cockroachdb = datahub.ingestion.source.sql.cockroachdb:CockroachDBSource", diff --git a/metadata-ingestion/src/datahub/configuration/source_common.py b/metadata-ingestion/src/datahub/configuration/source_common.py index 6a663a790c6c2b..7160aa6fc339d3 100644 --- a/metadata-ingestion/src/datahub/configuration/source_common.py +++ b/metadata-ingestion/src/datahub/configuration/source_common.py @@ -21,7 +21,9 @@ class PlatformInstanceConfigMixin(ConfigModel): platform_instance: Optional[str] = Field( default=None, - description="The instance of the platform that all assets produced by this recipe belong to", + description="The instance of the platform that all assets produced by this recipe belong to. " + "This should be unique within the platform. " + "See https://datahubproject.io/docs/platform-instances/ for more details.", ) diff --git a/metadata-ingestion/src/datahub/emitter/mce_builder.py b/metadata-ingestion/src/datahub/emitter/mce_builder.py index a0694983660e42..df769f35b4778e 100644 --- a/metadata-ingestion/src/datahub/emitter/mce_builder.py +++ b/metadata-ingestion/src/datahub/emitter/mce_builder.py @@ -50,6 +50,7 @@ UpstreamLineageClass, _Aspect as AspectAbstract, ) +from datahub.metadata.urns import CorpGroupUrn, CorpUserUrn from datahub.utilities.urn_encoder import UrnEncoder from datahub.utilities.urns.data_flow_urn import DataFlowUrn from datahub.utilities.urns.dataset_urn import DatasetUrn @@ -224,6 +225,21 @@ def make_user_urn(username: str) -> str: ) +def make_actor_urn(actor: str) -> Union[CorpUserUrn, CorpGroupUrn]: + """ + Makes a user urn if the input is not a user or group urn already + """ + return ( + CorpUserUrn(actor) + if not actor.startswith(("urn:li:corpuser:", "urn:li:corpGroup:")) + else ( + CorpUserUrn.from_string(actor) + if actor.startswith("urn:li:corpuser:") + else CorpGroupUrn.from_string(actor) + ) + ) + + def make_group_urn(groupname: str) -> str: """ Makes a group urn if the input is not a user or group urn already @@ -244,6 +260,12 @@ def make_tag_urn(tag: str) -> str: def make_owner_urn(owner: str, owner_type: OwnerType) -> str: + if owner_type == OwnerType.USER: + return make_user_urn(owner) + elif owner_type == OwnerType.GROUP: + return make_group_urn(owner) + # This should pretty much never happen. + # TODO: With Python 3.11, we can use typing.assert_never() here. return f"urn:li:{owner_type.value}:{owner}" diff --git a/metadata-ingestion/src/datahub/ingestion/glossary/classification_mixin.py b/metadata-ingestion/src/datahub/ingestion/glossary/classification_mixin.py index 3e5eb4347b474b..1d381acbf3dbe9 100644 --- a/metadata-ingestion/src/datahub/ingestion/glossary/classification_mixin.py +++ b/metadata-ingestion/src/datahub/ingestion/glossary/classification_mixin.py @@ -48,7 +48,7 @@ class ClassificationReportMixin: class ClassificationSourceConfigMixin(ConfigModel): classification: ClassificationConfig = Field( default=ClassificationConfig(), - description="For details, refer [Classification](../../../../metadata-ingestion/docs/dev_guides/classification.md).", + description="For details, refer to [Classification](../../../../metadata-ingestion/docs/dev_guides/classification.md).", ) diff --git a/metadata-ingestion/src/datahub/ingestion/graph/filters.py b/metadata-ingestion/src/datahub/ingestion/graph/filters.py index 8974f159171d1e..edb45fa5c2dbc1 100644 --- a/metadata-ingestion/src/datahub/ingestion/graph/filters.py +++ b/metadata-ingestion/src/datahub/ingestion/graph/filters.py @@ -110,6 +110,10 @@ def _get_env_filters(env: str) -> List[SearchFilterRule]: "field": "customProperties", "value": f"instance={env}", }, + { + "field": "env", + "value": env, + } # Note that not all entity types have an env (e.g. dashboards / charts). # If the env filter is specified, these will be excluded. ] diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/aws_common.py b/metadata-ingestion/src/datahub/ingestion/source/aws/aws_common.py index ba358d2465bbc9..6eb02fe80552ec 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/aws_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/aws_common.py @@ -1,3 +1,4 @@ +from datetime import datetime, timedelta, timezone from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union import boto3 @@ -73,6 +74,8 @@ class AwsConnectionConfig(ConfigModel): - dbt source """ + _credentials_expiration: Optional[datetime] = None + aws_access_key_id: Optional[str] = Field( default=None, description=f"AWS access key ID. {AUTODETECT_CREDENTIALS_DOC_LINK}", @@ -115,6 +118,11 @@ class AwsConnectionConfig(ConfigModel): description="Advanced AWS configuration options. These are passed directly to [botocore.config.Config](https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html).", ) + def allowed_cred_refresh(self) -> bool: + if self._normalized_aws_roles(): + return True + return False + def _normalized_aws_roles(self) -> List[AwsAssumeRoleConfig]: if not self.aws_role: return [] @@ -153,11 +161,14 @@ def get_session(self) -> Session: } for role in self._normalized_aws_roles(): - credentials = assume_role( - role, - self.aws_region, - credentials=credentials, - ) + if self._should_refresh_credentials(): + credentials = assume_role( + role, + self.aws_region, + credentials=credentials, + ) + if isinstance(credentials["Expiration"], datetime): + self._credentials_expiration = credentials["Expiration"] session = Session( aws_access_key_id=credentials["AccessKeyId"], @@ -168,6 +179,12 @@ def get_session(self) -> Session: return session + def _should_refresh_credentials(self) -> bool: + if self._credentials_expiration is None: + return True + remaining_time = self._credentials_expiration - datetime.now(timezone.utc) + return remaining_time < timedelta(minutes=5) + def get_credentials(self) -> Dict[str, Optional[str]]: credentials = self.get_session().get_credentials() if credentials is not None: diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker.py b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker.py index acbc6eb9a0e442..b63fa57f069b5b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker.py @@ -1,5 +1,5 @@ from collections import defaultdict -from typing import DefaultDict, Dict, Iterable, List, Optional +from typing import TYPE_CHECKING, DefaultDict, Dict, Iterable, List, Optional from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.decorators import ( @@ -33,6 +33,9 @@ StatefulIngestionSourceBase, ) +if TYPE_CHECKING: + from mypy_boto3_sagemaker import SageMakerClient + @platform_name("SageMaker") @config_class(SagemakerSourceConfig) @@ -56,6 +59,7 @@ def __init__(self, config: SagemakerSourceConfig, ctx: PipelineContext): self.report = SagemakerSourceReport() self.sagemaker_client = config.sagemaker_client self.env = config.env + self.client_factory = ClientFactory(config) @classmethod def create(cls, config_dict, ctx): @@ -92,7 +96,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: # extract jobs if specified if self.source_config.extract_jobs is not False: job_processor = JobProcessor( - sagemaker_client=self.sagemaker_client, + sagemaker_client=self.client_factory.get_client, env=self.env, report=self.report, job_type_filter=self.source_config.extract_jobs, @@ -118,3 +122,15 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: def get_report(self): return self.report + + +class ClientFactory: + def __init__(self, config: SagemakerSourceConfig): + self.config = config + self._cached_client = self.config.sagemaker_client + + def get_client(self) -> "SageMakerClient": + if self.config.allowed_cred_refresh(): + # Always fetch the client dynamically with auto-refresh logic + return self.config.sagemaker_client + return self._cached_client diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/jobs.py b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/jobs.py index a1a5a008842372..73a83295ec8cba 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/jobs.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/jobs.py @@ -4,6 +4,7 @@ from typing import ( TYPE_CHECKING, Any, + Callable, DefaultDict, Dict, Iterable, @@ -147,7 +148,7 @@ class JobProcessor: """ # boto3 SageMaker client - sagemaker_client: "SageMakerClient" + sagemaker_client: Callable[[], "SageMakerClient"] env: str report: SagemakerSourceReport # config filter for specific job types to ingest (see metadata-ingestion README) @@ -170,8 +171,7 @@ class JobProcessor: def get_jobs(self, job_type: JobType, job_spec: JobInfo) -> List[Any]: jobs = [] - - paginator = self.sagemaker_client.get_paginator(job_spec.list_command) + paginator = self.sagemaker_client().get_paginator(job_spec.list_command) for page in paginator.paginate(): page_jobs: List[Any] = page[job_spec.list_key] @@ -269,7 +269,7 @@ def get_job_details(self, job_name: str, job_type: JobType) -> Dict[str, Any]: describe_command = job_type_to_info[job_type].describe_command describe_name_key = job_type_to_info[job_type].describe_name_key - return getattr(self.sagemaker_client, describe_command)( + return getattr(self.sagemaker_client(), describe_command)( **{describe_name_key: job_name} ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py index 7a96b2f0643ab0..f37f5358f9e17d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py @@ -4,7 +4,6 @@ import os from typing import Iterable, List, Optional -from datahub.emitter.mce_builder import make_dataset_urn from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.decorators import ( SupportStatus, @@ -21,15 +20,12 @@ TestConnectionReport, ) from datahub.ingestion.api.workunit import MetadataWorkUnit -from datahub.ingestion.source.bigquery_v2.bigquery_audit import ( - BigqueryTableIdentifier, - BigQueryTableRef, -) +from datahub.ingestion.source.bigquery_v2.bigquery_audit import BigqueryTableIdentifier from datahub.ingestion.source.bigquery_v2.bigquery_config import BigQueryV2Config from datahub.ingestion.source.bigquery_v2.bigquery_report import BigQueryV2Report from datahub.ingestion.source.bigquery_v2.bigquery_schema import ( - BigqueryProject, BigQuerySchemaApi, + get_projects, ) from datahub.ingestion.source.bigquery_v2.bigquery_schema_gen import ( BigQuerySchemaGenerator, @@ -37,6 +33,10 @@ from datahub.ingestion.source.bigquery_v2.bigquery_test_connection import ( BigQueryTestConnection, ) +from datahub.ingestion.source.bigquery_v2.common import ( + BigQueryFilter, + BigQueryIdentifierBuilder, +) from datahub.ingestion.source.bigquery_v2.lineage import BigqueryLineageExtractor from datahub.ingestion.source.bigquery_v2.profiler import BigqueryProfiler from datahub.ingestion.source.bigquery_v2.usage import BigQueryUsageExtractor @@ -109,12 +109,11 @@ def __init__(self, ctx: PipelineContext, config: BigQueryV2Config): BigqueryTableIdentifier._BIGQUERY_DEFAULT_SHARDED_TABLE_REGEX = ( self.config.sharded_table_pattern ) - if self.config.enable_legacy_sharded_table_support: - BigqueryTableIdentifier._BQ_SHARDED_TABLE_SUFFIX = "" self.bigquery_data_dictionary = BigQuerySchemaApi( - self.report.schema_api_perf, - self.config.get_bigquery_client(), + report=BigQueryV2Report().schema_api_perf, + projects_client=config.get_projects_client(), + client=config.get_bigquery_client(), ) if self.config.extract_policy_tags_from_catalog: self.bigquery_data_dictionary.datacatalog_client = ( @@ -122,6 +121,8 @@ def __init__(self, ctx: PipelineContext, config: BigQueryV2Config): ) self.sql_parser_schema_resolver = self._init_schema_resolver() + self.filters = BigQueryFilter(self.config, self.report) + self.identifiers = BigQueryIdentifierBuilder(self.config, self.report) redundant_lineage_run_skip_handler: Optional[ RedundantLineageRunSkipHandler @@ -138,7 +139,7 @@ def __init__(self, ctx: PipelineContext, config: BigQueryV2Config): self.lineage_extractor = BigqueryLineageExtractor( config, self.report, - dataset_urn_builder=self.gen_dataset_urn_from_raw_ref, + identifiers=self.identifiers, redundant_run_skip_handler=redundant_lineage_run_skip_handler, ) @@ -155,7 +156,7 @@ def __init__(self, ctx: PipelineContext, config: BigQueryV2Config): config, self.report, schema_resolver=self.sql_parser_schema_resolver, - dataset_urn_builder=self.gen_dataset_urn_from_raw_ref, + identifiers=self.identifiers, redundant_run_skip_handler=redundant_usage_run_skip_handler, ) @@ -178,7 +179,7 @@ def __init__(self, ctx: PipelineContext, config: BigQueryV2Config): self.domain_registry, self.sql_parser_schema_resolver, self.profiler, - self.gen_dataset_urn, + self.identifiers, ) self.add_config_to_report() @@ -231,7 +232,11 @@ def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: ] def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: - projects = self._get_projects() + projects = get_projects( + self.bq_schema_extractor.schema_api, + self.report, + self.filters, + ) if not projects: return @@ -255,66 +260,6 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: self.bq_schema_extractor.table_refs, ) - def _get_projects(self) -> List[BigqueryProject]: - logger.info("Getting projects") - if self.config.project_ids or self.config.project_id: - project_ids = self.config.project_ids or [self.config.project_id] # type: ignore - return [ - BigqueryProject(id=project_id, name=project_id) - for project_id in project_ids - ] - else: - return list(self._query_project_list()) - - def _query_project_list(self) -> Iterable[BigqueryProject]: - try: - projects = self.bigquery_data_dictionary.get_projects() - - if ( - not projects - ): # Report failure on exception and if empty list is returned - self.report.failure( - title="Get projects didn't return any project. ", - message="Maybe resourcemanager.projects.get permission is missing for the service account. " - "You can assign predefined roles/bigquery.metadataViewer role to your service account.", - ) - except Exception as e: - self.report.failure( - title="Failed to get BigQuery Projects", - message="Maybe resourcemanager.projects.get permission is missing for the service account. " - "You can assign predefined roles/bigquery.metadataViewer role to your service account.", - exc=e, - ) - projects = [] - - for project in projects: - if self.config.project_id_pattern.allowed(project.id): - yield project - else: - self.report.report_dropped(project.id) - - def gen_dataset_urn( - self, project_id: str, dataset_name: str, table: str, use_raw_name: bool = False - ) -> str: - datahub_dataset_name = BigqueryTableIdentifier(project_id, dataset_name, table) - return make_dataset_urn( - self.platform, - ( - str(datahub_dataset_name) - if not use_raw_name - else datahub_dataset_name.raw_table_name() - ), - self.config.env, - ) - - def gen_dataset_urn_from_raw_ref(self, ref: BigQueryTableRef) -> str: - return self.gen_dataset_urn( - ref.table_identifier.project_id, - ref.table_identifier.dataset, - ref.table_identifier.table, - use_raw_name=True, - ) - def get_report(self) -> BigQueryV2Report: return self.report diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py index bc00517567bbd4..319c838d2658ad 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py @@ -7,7 +7,6 @@ from dateutil import parser -from datahub.emitter.mce_builder import make_dataset_urn from datahub.utilities.parsing_util import ( get_first_missing_key, get_first_missing_key_any, @@ -213,13 +212,6 @@ def get_sanitized_table_ref(self) -> "BigQueryTableRef": BigqueryTableIdentifier.from_string_name(sanitized_table) ) - def to_urn(self, env: str) -> str: - return make_dataset_urn( - "bigquery", - f"{self.table_identifier.project_id}.{self.table_identifier.dataset}.{self.table_identifier.table}", - env, - ) - def __str__(self) -> str: return f"projects/{self.table_identifier.project_id}/datasets/{self.table_identifier.dataset}/tables/{self.table_identifier.table}" @@ -294,19 +286,27 @@ def from_entry( job.get("jobName", {}).get("jobId"), ), project_id=job.get("jobName", {}).get("projectId"), - default_dataset=job_query_conf["defaultDataset"] - if job_query_conf["defaultDataset"] - else None, - start_time=parser.parse(job["jobStatistics"]["startTime"]) - if job["jobStatistics"]["startTime"] - else None, - end_time=parser.parse(job["jobStatistics"]["endTime"]) - if job["jobStatistics"]["endTime"] - else None, - numAffectedRows=int(job["jobStatistics"]["queryOutputRowCount"]) - if "queryOutputRowCount" in job["jobStatistics"] - and job["jobStatistics"]["queryOutputRowCount"] - else None, + default_dataset=( + job_query_conf["defaultDataset"] + if job_query_conf["defaultDataset"] + else None + ), + start_time=( + parser.parse(job["jobStatistics"]["startTime"]) + if job["jobStatistics"]["startTime"] + else None + ), + end_time=( + parser.parse(job["jobStatistics"]["endTime"]) + if job["jobStatistics"]["endTime"] + else None + ), + numAffectedRows=( + int(job["jobStatistics"]["queryOutputRowCount"]) + if "queryOutputRowCount" in job["jobStatistics"] + and job["jobStatistics"]["queryOutputRowCount"] + else None + ), statementType=job_query_conf.get("statementType", "UNKNOWN"), ) # destinationTable @@ -376,18 +376,26 @@ def from_exported_bigquery_audit_metadata( query=query_config["query"], job_name=job["jobName"], project_id=QueryEvent._get_project_id_from_job_name(job["jobName"]), - default_dataset=query_config["defaultDataset"] - if query_config.get("defaultDataset") - else None, - start_time=parser.parse(job["jobStats"]["startTime"]) - if job["jobStats"]["startTime"] - else None, - end_time=parser.parse(job["jobStats"]["endTime"]) - if job["jobStats"]["endTime"] - else None, - numAffectedRows=int(query_stats["outputRowCount"]) - if query_stats.get("outputRowCount") - else None, + default_dataset=( + query_config["defaultDataset"] + if query_config.get("defaultDataset") + else None + ), + start_time=( + parser.parse(job["jobStats"]["startTime"]) + if job["jobStats"]["startTime"] + else None + ), + end_time=( + parser.parse(job["jobStats"]["endTime"]) + if job["jobStats"]["endTime"] + else None + ), + numAffectedRows=( + int(query_stats["outputRowCount"]) + if query_stats.get("outputRowCount") + else None + ), statementType=query_config.get("statementType", "UNKNOWN"), ) # jobName @@ -445,18 +453,26 @@ def from_entry_v2( timestamp=row.timestamp, actor_email=payload["authenticationInfo"]["principalEmail"], query=query_config["query"], - default_dataset=query_config["defaultDataset"] - if "defaultDataset" in query_config and query_config["defaultDataset"] - else None, - start_time=parser.parse(job["jobStats"]["startTime"]) - if job["jobStats"]["startTime"] - else None, - end_time=parser.parse(job["jobStats"]["endTime"]) - if job["jobStats"]["endTime"] - else None, - numAffectedRows=int(query_stats["outputRowCount"]) - if "outputRowCount" in query_stats and query_stats["outputRowCount"] - else None, + default_dataset=( + query_config["defaultDataset"] + if "defaultDataset" in query_config and query_config["defaultDataset"] + else None + ), + start_time=( + parser.parse(job["jobStats"]["startTime"]) + if job["jobStats"]["startTime"] + else None + ), + end_time=( + parser.parse(job["jobStats"]["endTime"]) + if job["jobStats"]["endTime"] + else None + ), + numAffectedRows=( + int(query_stats["outputRowCount"]) + if "outputRowCount" in query_stats and query_stats["outputRowCount"] + else None + ), statementType=query_config.get("statementType", "UNKNOWN"), ) query_event.job_name = job.get("jobName") diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py index fe961dbd780f6f..c5a8b2ab7fbe33 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py @@ -1,26 +1,33 @@ +import json import logging import os +import re +import tempfile from datetime import timedelta from typing import Any, Dict, List, Optional, Union -from google.cloud import bigquery, datacatalog_v1 +from google.cloud import bigquery, datacatalog_v1, resourcemanager_v3 from google.cloud.logging_v2.client import Client as GCPLoggingClient from pydantic import Field, PositiveInt, PrivateAttr, root_validator, validator from datahub.configuration.common import AllowDenyPattern, ConfigModel +from datahub.configuration.source_common import ( + EnvConfigMixin, + LowerCaseDatasetUrnConfigMixin, + PlatformInstanceConfigMixin, +) from datahub.configuration.validate_field_removal import pydantic_removed_field +from datahub.configuration.validate_multiline_string import pydantic_multiline_string from datahub.ingestion.glossary.classification_mixin import ( ClassificationSourceConfigMixin, ) -from datahub.ingestion.source.sql.sql_config import SQLCommonConfig +from datahub.ingestion.source.sql.sql_config import SQLCommonConfig, SQLFilterConfig from datahub.ingestion.source.state.stateful_ingestion_base import ( StatefulLineageConfigMixin, StatefulProfilingConfigMixin, StatefulUsageConfigMixin, ) from datahub.ingestion.source.usage.usage_common import BaseUsageConfig -from datahub.ingestion.source_config.bigquery import BigQueryBaseConfig -from datahub.ingestion.source_config.usage.bigquery_usage import BigQueryCredential logger = logging.getLogger(__name__) @@ -28,19 +35,117 @@ os.getenv("DATAHUB_BIGQUERY_SCHEMA_PARALLELISM", 20) ) +# Regexp for sharded tables. +# A sharded table is a table that has a suffix of the form _yyyymmdd or yyyymmdd, where yyyymmdd is a date. +# The regexp checks for valid dates in the suffix (e.g. 20200101, 20200229, 20201231) and if the date is not valid +# then it is not a sharded table. +_BIGQUERY_DEFAULT_SHARDED_TABLE_REGEX: str = ( + "((.+\\D)[_$]?)?(\\d\\d\\d\\d(?:0[1-9]|1[0-2])(?:0[1-9]|[12][0-9]|3[01]))$" +) + + +class BigQueryBaseConfig(ConfigModel): + rate_limit: bool = Field( + default=False, description="Should we rate limit requests made to API." + ) + requests_per_min: int = Field( + default=60, + description="Used to control number of API calls made per min. Only used when `rate_limit` is set to `True`.", + ) + + temp_table_dataset_prefix: str = Field( + default="_", + description="If you are creating temp tables in a dataset with a particular prefix you can use this config to set the prefix for the dataset. This is to support workflows from before bigquery's introduction of temp tables. By default we use `_` because of datasets that begin with an underscore are hidden by default https://cloud.google.com/bigquery/docs/datasets#dataset-naming.", + ) + + sharded_table_pattern: str = Field( + deprecated=True, + default=_BIGQUERY_DEFAULT_SHARDED_TABLE_REGEX, + description="The regex pattern to match sharded tables and group as one table. This is a very low level config parameter, only change if you know what you are doing, ", + ) + + @validator("sharded_table_pattern") + def sharded_table_pattern_is_a_valid_regexp(cls, v): + try: + re.compile(v) + except Exception as e: + raise ValueError( + "sharded_table_pattern configuration pattern is invalid." + ) from e + return v + + @root_validator(pre=True, skip_on_failure=True) + def project_id_backward_compatibility_configs_set(cls, values: Dict) -> Dict: + project_id = values.pop("project_id", None) + project_ids = values.get("project_ids") + + if not project_ids and project_id: + values["project_ids"] = [project_id] + elif project_ids and project_id: + logging.warning( + "Please use `project_ids` config. Config `project_id` will be ignored." + ) + return values + class BigQueryUsageConfig(BaseUsageConfig): _query_log_delay_removed = pydantic_removed_field("query_log_delay") max_query_duration: timedelta = Field( default=timedelta(minutes=15), - description="Correction to pad start_time and end_time with. For handling the case where the read happens within our time range but the query completion event is delayed and happens after the configured end time.", + description="Correction to pad start_time and end_time with. For handling the case where the read happens " + "within our time range but the query completion event is delayed and happens after the configured" + " end time.", ) apply_view_usage_to_tables: bool = Field( default=False, - description="Whether to apply view's usage to its base tables. If set to False, uses sql parser and applies usage to views / tables mentioned in the query. If set to True, usage is applied to base tables only.", + description="Whether to apply view's usage to its base tables. If set to False, uses sql parser and applies " + "usage to views / tables mentioned in the query. If set to True, usage is applied to base tables " + "only.", + ) + + +class BigQueryCredential(ConfigModel): + project_id: str = Field(description="Project id to set the credentials") + private_key_id: str = Field(description="Private key id") + private_key: str = Field( + description="Private key in a form of '-----BEGIN PRIVATE KEY-----\\nprivate-key\\n-----END PRIVATE KEY-----\\n'" + ) + client_email: str = Field(description="Client email") + client_id: str = Field(description="Client Id") + auth_uri: str = Field( + default="https://accounts.google.com/o/oauth2/auth", + description="Authentication uri", + ) + token_uri: str = Field( + default="https://oauth2.googleapis.com/token", description="Token uri" + ) + auth_provider_x509_cert_url: str = Field( + default="https://www.googleapis.com/oauth2/v1/certs", + description="Auth provider x509 certificate url", ) + type: str = Field(default="service_account", description="Authentication type") + client_x509_cert_url: Optional[str] = Field( + default=None, + description="If not set it will be default to https://www.googleapis.com/robot/v1/metadata/x509/client_email", + ) + + _fix_private_key_newlines = pydantic_multiline_string("private_key") + + @root_validator(skip_on_failure=True) + def validate_config(cls, values: Dict[str, Any]) -> Dict[str, Any]: + if values.get("client_x509_cert_url") is None: + values[ + "client_x509_cert_url" + ] = f'https://www.googleapis.com/robot/v1/metadata/x509/{values["client_email"]}' + return values + + def create_credential_temp_file(self) -> str: + with tempfile.NamedTemporaryFile(delete=False) as fp: + cred_json = json.dumps(self.dict(), indent=4, separators=(",", ": ")) + fp.write(cred_json.encode()) + return fp.name class BigQueryConnectionConfig(ConfigModel): @@ -74,6 +179,9 @@ def get_bigquery_client(self) -> bigquery.Client: client_options = self.extra_client_options return bigquery.Client(self.project_on_behalf, **client_options) + def get_projects_client(self) -> resourcemanager_v3.ProjectsClient: + return resourcemanager_v3.ProjectsClient() + def get_policy_tag_manager_client(self) -> datacatalog_v1.PolicyTagManagerClient: return datacatalog_v1.PolicyTagManagerClient() @@ -98,19 +206,139 @@ def get_sql_alchemy_url(self) -> str: return "bigquery://" +class BigQueryFilterConfig(SQLFilterConfig): + project_ids: List[str] = Field( + default_factory=list, + description=( + "Ingests specified project_ids. Use this property if you want to specify what projects to ingest or " + "don't want to give project resourcemanager.projects.list to your service account. " + "Overrides `project_id_pattern`." + ), + ) + project_labels: List[str] = Field( + default_factory=list, + description=( + "Ingests projects with the specified labels. Set value in the format of `key:value`. Use this property to " + "define which projects to ingest based" + "on project-level labels. If project_ids or project_id is set, this configuration has no effect. The " + "ingestion process filters projects by label first, and then applies the project_id_pattern." + ), + ) + + project_id_pattern: AllowDenyPattern = Field( + default=AllowDenyPattern.allow_all(), + description="Regex patterns for project_id to filter in ingestion.", + ) + + dataset_pattern: AllowDenyPattern = Field( + default=AllowDenyPattern.allow_all(), + description="Regex patterns for dataset to filter in ingestion. Specify regex to only match the schema name. " + "e.g. to match all tables in schema analytics, use the regex 'analytics'", + ) + + match_fully_qualified_names: bool = Field( + default=True, + description="[deprecated] Whether `dataset_pattern` is matched against fully qualified dataset name " + "`.`.", + ) + + table_snapshot_pattern: AllowDenyPattern = Field( + default=AllowDenyPattern.allow_all(), + description="Regex patterns for table snapshots to filter in ingestion. Specify regex to match the entire " + "snapshot name in database.schema.snapshot format. e.g. to match all snapshots starting with " + "customer in Customer database and public schema, use the regex 'Customer.public.customer.*'", + ) + + # NOTE: `schema_pattern` is added here only to hide it from docs. + schema_pattern: AllowDenyPattern = Field( + default=AllowDenyPattern.allow_all(), + hidden_from_docs=True, + ) + + @root_validator(pre=False, skip_on_failure=True) + def backward_compatibility_configs_set(cls, values: Dict) -> Dict: + dataset_pattern: Optional[AllowDenyPattern] = values.get("dataset_pattern") + schema_pattern = values.get("schema_pattern") + if ( + dataset_pattern == AllowDenyPattern.allow_all() + and schema_pattern != AllowDenyPattern.allow_all() + ): + logging.warning( + "dataset_pattern is not set but schema_pattern is set, using schema_pattern as dataset_pattern. " + "schema_pattern will be deprecated, please use dataset_pattern instead." + ) + values["dataset_pattern"] = schema_pattern + dataset_pattern = schema_pattern + elif ( + dataset_pattern != AllowDenyPattern.allow_all() + and schema_pattern != AllowDenyPattern.allow_all() + ): + logging.warning( + "schema_pattern will be ignored in favour of dataset_pattern. schema_pattern will be deprecated," + " please use dataset_pattern only." + ) + + match_fully_qualified_names = values.get("match_fully_qualified_names") + + if ( + dataset_pattern is not None + and dataset_pattern != AllowDenyPattern.allow_all() + and match_fully_qualified_names is not None + and not match_fully_qualified_names + ): + logger.warning( + "Please update `dataset_pattern` to match against fully qualified schema name " + "`.` and set config `match_fully_qualified_names : True`." + "The config option `match_fully_qualified_names` is deprecated and will be " + "removed in a future release." + ) + elif match_fully_qualified_names and dataset_pattern is not None: + adjusted = False + for lst in [dataset_pattern.allow, dataset_pattern.deny]: + for i, pattern in enumerate(lst): + if "." not in pattern: + if pattern.startswith("^"): + lst[i] = r"^.*\." + pattern[1:] + else: + lst[i] = r".*\." + pattern + adjusted = True + if adjusted: + logger.warning( + "`dataset_pattern` was adjusted to match against fully qualified schema names," + " of the form `.`." + ) + + return values + + +class BigQueryIdentifierConfig( + PlatformInstanceConfigMixin, EnvConfigMixin, LowerCaseDatasetUrnConfigMixin +): + include_data_platform_instance: bool = Field( + default=False, + description="Whether to create a DataPlatformInstance aspect, equal to the BigQuery project id." + " If enabled, will cause redundancy in the browse path for BigQuery entities in the UI," + " because the project id is represented as the top-level container.", + ) + + enable_legacy_sharded_table_support: bool = Field( + default=True, + description="Use the legacy sharded table urn suffix added.", + ) + + class BigQueryV2Config( BigQueryConnectionConfig, BigQueryBaseConfig, + BigQueryFilterConfig, + # BigQueryFilterConfig must come before (higher precedence) the SQLCommon config, so that the documentation overrides are applied. + BigQueryIdentifierConfig, SQLCommonConfig, StatefulUsageConfigMixin, StatefulLineageConfigMixin, StatefulProfilingConfigMixin, ClassificationSourceConfigMixin, ): - project_id_pattern: AllowDenyPattern = Field( - default=AllowDenyPattern.allow_all(), - description="Regex patterns for project_id to filter in ingestion.", - ) include_schema_metadata: bool = Field( default=True, @@ -141,37 +369,15 @@ class BigQueryV2Config( description="Capture BigQuery dataset labels as DataHub tag", ) - dataset_pattern: AllowDenyPattern = Field( - default=AllowDenyPattern.allow_all(), - description="Regex patterns for dataset to filter in ingestion. Specify regex to only match the schema name. e.g. to match all tables in schema analytics, use the regex 'analytics'", - ) - - match_fully_qualified_names: bool = Field( - default=True, - description="[deprecated] Whether `dataset_pattern` is matched against fully qualified dataset name `.`.", - ) - include_external_url: bool = Field( default=True, description="Whether to populate BigQuery Console url to Datasets/Tables", ) - include_data_platform_instance: bool = Field( - default=False, - description="Whether to create a DataPlatformInstance aspect, equal to the BigQuery project id." - " If enabled, will cause redundancy in the browse path for BigQuery entities in the UI," - " because the project id is represented as the top-level container.", - ) - include_table_snapshots: Optional[bool] = Field( default=True, description="Whether table snapshots should be ingested." ) - table_snapshot_pattern: AllowDenyPattern = Field( - default=AllowDenyPattern.allow_all(), - description="Regex patterns for table snapshots to filter in ingestion. Specify regex to match the entire snapshot name in database.schema.snapshot format. e.g. to match all snapshots starting with customer in Customer database and public schema, use the regex 'Customer.public.customer.*'", - ) - debug_include_full_payloads: bool = Field( default=False, description="Include full payload into events. It is only for debugging and internal use.", @@ -180,17 +386,22 @@ class BigQueryV2Config( number_of_datasets_process_in_batch: int = Field( hidden_from_docs=True, default=10000, - description="Number of table queried in batch when getting metadata. This is a low level config property which should be touched with care.", + description="Number of table queried in batch when getting metadata. This is a low level config property " + "which should be touched with care.", ) number_of_datasets_process_in_batch_if_profiling_enabled: int = Field( default=1000, - description="Number of partitioned table queried in batch when getting metadata. This is a low level config property which should be touched with care. This restriction is needed because we query partitions system view which throws error if we try to touch too many tables.", + description="Number of partitioned table queried in batch when getting metadata. This is a low level config " + "property which should be touched with care. This restriction is needed because we query " + "partitions system view which throws error if we try to touch too many tables.", ) use_tables_list_query_v2: bool = Field( default=False, - description="List tables using an improved query that extracts partitions and last modified timestamps more accurately. Requires the ability to read table data. Automatically enabled when profiling is enabled.", + description="List tables using an improved query that extracts partitions and last modified timestamps more " + "accurately. Requires the ability to read table data. Automatically enabled when profiling is " + "enabled.", ) @property @@ -199,23 +410,10 @@ def have_table_data_read_permission(self) -> bool: column_limit: int = Field( default=300, - description="Maximum number of columns to process in a table. This is a low level config property which should be touched with care. This restriction is needed because excessively wide tables can result in failure to ingest the schema.", + description="Maximum number of columns to process in a table. This is a low level config property which " + "should be touched with care. This restriction is needed because excessively wide tables can " + "result in failure to ingest the schema.", ) - # The inheritance hierarchy is wonky here, but these options need modifications. - project_id: Optional[str] = Field( - default=None, - description="[deprecated] Use project_id_pattern or project_ids instead.", - ) - project_ids: List[str] = Field( - default_factory=list, - description=( - "Ingests specified project_ids. Use this property if you want to specify what projects to ingest or " - "don't want to give project resourcemanager.projects.list to your service account. " - "Overrides `project_id_pattern`." - ), - ) - - storage_project_id: None = Field(default=None, hidden_from_docs=True) lineage_use_sql_parser: bool = Field( default=True, @@ -242,11 +440,6 @@ def have_table_data_read_permission(self) -> bool: description="This flag enables the data lineage extraction from Data Lineage API exposed by Google Data Catalog. NOTE: This extractor can't build views lineage. It's recommended to enable the view's DDL parsing. Read the docs to have more information about: https://cloud.google.com/data-catalog/docs/concepts/about-data-lineage", ) - enable_legacy_sharded_table_support: bool = Field( - default=True, - description="Use the legacy sharded table urn suffix added.", - ) - extract_policy_tags_from_catalog: bool = Field( default=False, description=( @@ -342,70 +535,6 @@ def validate_bigquery_audit_metadata_datasets( return v - @root_validator(pre=False, skip_on_failure=True) - def backward_compatibility_configs_set(cls, values: Dict) -> Dict: - project_id = values.get("project_id") - project_id_pattern = values.get("project_id_pattern") - - if project_id_pattern == AllowDenyPattern.allow_all() and project_id: - logging.warning( - "project_id_pattern is not set but project_id is set, source will only ingest the project_id project. project_id will be deprecated, please use project_id_pattern instead." - ) - values["project_id_pattern"] = AllowDenyPattern(allow=[f"^{project_id}$"]) - elif project_id_pattern != AllowDenyPattern.allow_all() and project_id: - logging.warning( - "use project_id_pattern whenever possible. project_id will be deprecated, please use project_id_pattern only if possible." - ) - - dataset_pattern: Optional[AllowDenyPattern] = values.get("dataset_pattern") - schema_pattern = values.get("schema_pattern") - if ( - dataset_pattern == AllowDenyPattern.allow_all() - and schema_pattern != AllowDenyPattern.allow_all() - ): - logging.warning( - "dataset_pattern is not set but schema_pattern is set, using schema_pattern as dataset_pattern. schema_pattern will be deprecated, please use dataset_pattern instead." - ) - values["dataset_pattern"] = schema_pattern - dataset_pattern = schema_pattern - elif ( - dataset_pattern != AllowDenyPattern.allow_all() - and schema_pattern != AllowDenyPattern.allow_all() - ): - logging.warning( - "schema_pattern will be ignored in favour of dataset_pattern. schema_pattern will be deprecated, please use dataset_pattern only." - ) - - match_fully_qualified_names = values.get("match_fully_qualified_names") - - if ( - dataset_pattern is not None - and dataset_pattern != AllowDenyPattern.allow_all() - and match_fully_qualified_names is not None - and not match_fully_qualified_names - ): - logger.warning( - "Please update `dataset_pattern` to match against fully qualified schema name `.` and set config `match_fully_qualified_names : True`." - "The config option `match_fully_qualified_names` is deprecated and will be removed in a future release." - ) - elif match_fully_qualified_names and dataset_pattern is not None: - adjusted = False - for lst in [dataset_pattern.allow, dataset_pattern.deny]: - for i, pattern in enumerate(lst): - if "." not in pattern: - if pattern.startswith("^"): - lst[i] = r"^.*\." + pattern[1:] - else: - lst[i] = r".*\." + pattern - adjusted = True - if adjusted: - logger.warning( - "`dataset_pattern` was adjusted to match against fully qualified schema names," - " of the form `.`." - ) - - return values - def get_table_pattern(self, pattern: List[str]) -> str: return "|".join(pattern) if pattern else "" diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_queries.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_queries.py new file mode 100644 index 00000000000000..fffb5cfc8abfdf --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_queries.py @@ -0,0 +1,90 @@ +import logging +from dataclasses import dataclass, field +from typing import Iterable, Optional + +from pydantic import Field +from typing_extensions import Self + +from datahub.configuration.time_window_config import BaseTimeWindowConfig +from datahub.ingestion.api.common import PipelineContext +from datahub.ingestion.api.source import Source, SourceReport +from datahub.ingestion.api.workunit import MetadataWorkUnit +from datahub.ingestion.source.bigquery_v2.bigquery_config import ( + BigQueryConnectionConfig, + BigQueryFilterConfig, + BigQueryIdentifierConfig, +) +from datahub.ingestion.source.bigquery_v2.bigquery_report import ( + BigQuerySchemaApiPerfReport, +) +from datahub.ingestion.source.bigquery_v2.bigquery_schema import BigQuerySchemaApi +from datahub.ingestion.source.bigquery_v2.common import ( + BigQueryFilter, + BigQueryIdentifierBuilder, +) +from datahub.ingestion.source.bigquery_v2.queries_extractor import ( + BigQueryQueriesExtractor, + BigQueryQueriesExtractorConfig, + BigQueryQueriesExtractorReport, +) + +logger = logging.getLogger(__name__) + + +@dataclass +class BigQueryQueriesSourceReport(SourceReport): + window: Optional[BaseTimeWindowConfig] = None + queries_extractor: Optional[BigQueryQueriesExtractorReport] = None + schema_api_perf: BigQuerySchemaApiPerfReport = field( + default_factory=BigQuerySchemaApiPerfReport + ) + + +class BigQueryQueriesSourceConfig( + BigQueryQueriesExtractorConfig, BigQueryFilterConfig, BigQueryIdentifierConfig +): + connection: BigQueryConnectionConfig = Field( + default_factory=BigQueryConnectionConfig + ) + + +class BigQueryQueriesSource(Source): + def __init__(self, ctx: PipelineContext, config: BigQueryQueriesSourceConfig): + self.ctx = ctx + self.config = config + self.report = BigQueryQueriesSourceReport() + + self.filters = BigQueryFilter(self.config, self.report) + self.identifiers = BigQueryIdentifierBuilder(self.config, self.report) + + self.connection = self.config.connection.get_bigquery_client() + + self.queries_extractor = BigQueryQueriesExtractor( + connection=self.connection, + schema_api=BigQuerySchemaApi( + self.report.schema_api_perf, + self.connection, + projects_client=self.config.connection.get_projects_client(), + ), + config=self.config, + structured_report=self.report, + filters=self.filters, + identifiers=self.identifiers, + graph=self.ctx.graph, + ) + self.report.queries_extractor = self.queries_extractor.report + + @classmethod + def create(cls, config_dict: dict, ctx: PipelineContext) -> Self: + config = BigQueryQueriesSourceConfig.parse_obj(config_dict) + return cls(ctx, config) + + def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: + self.report.window = self.config.window + + # TODO: Disable auto status processor? + # TODO: Don't emit lineage, usage, operations for ghost entities + return self.queries_extractor.get_workunits_internal() + + def get_report(self) -> BigQueryQueriesSourceReport: + return self.report diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py index 4cfcc3922ddc3d..d68468fd56c9bc 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py @@ -11,6 +11,7 @@ from datahub.ingestion.source.sql.sql_generic_profiler import ProfilingSqlReport from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport from datahub.ingestion.source_report.time_window import BaseTimeWindowReport +from datahub.sql_parsing.sql_parsing_aggregator import SqlAggregatorReport from datahub.utilities.lossy_collections import LossyDict, LossyList from datahub.utilities.perf_timer import PerfTimer from datahub.utilities.stats_collections import TopKDict, int_top_k_dict @@ -30,8 +31,9 @@ class BigQuerySchemaApiPerfReport(Report): num_get_views_for_dataset_api_requests: int = 0 num_get_snapshots_for_dataset_api_requests: int = 0 - list_projects: PerfTimer = field(default_factory=PerfTimer) - list_datasets: PerfTimer = field(default_factory=PerfTimer) + list_projects_timer: PerfTimer = field(default_factory=PerfTimer) + list_projects_with_labels_timer: PerfTimer = field(default_factory=PerfTimer) + list_datasets_timer: PerfTimer = field(default_factory=PerfTimer) get_columns_for_dataset_sec: float = 0 get_tables_for_dataset_sec: float = 0 @@ -169,5 +171,8 @@ class BigQueryV2Report( usage_end_time: Optional[datetime] = None stateful_usage_ingestion_enabled: bool = False + # lineage/usage v2 + sql_aggregator: Optional[SqlAggregatorReport] = None + def set_ingestion_stage(self, project_id: str, stage: str) -> None: self.report_ingestion_stage_start(f"{project_id}: {stage}") diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py index 3b6cca59d8289b..6301c389815861 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py @@ -5,8 +5,8 @@ from typing import Any, Dict, Iterable, Iterator, List, Optional from google.api_core import retry -from google.cloud import bigquery, datacatalog_v1 from google.cloud.bigquery import retry as bq_retry +from google.cloud import bigquery, datacatalog_v1, resourcemanager_v3 from google.cloud.bigquery.table import ( RowIterator, TableListItem, @@ -14,12 +14,14 @@ TimePartitioningType, ) +from datahub.ingestion.api.source import SourceReport from datahub.ingestion.source.bigquery_v2.bigquery_audit import BigqueryTableIdentifier from datahub.ingestion.source.bigquery_v2.bigquery_helper import parse_labels from datahub.ingestion.source.bigquery_v2.bigquery_report import ( BigQuerySchemaApiPerfReport, BigQueryV2Report, ) +from datahub.ingestion.source.bigquery_v2.common import BigQueryFilter from datahub.ingestion.source.bigquery_v2.queries import ( BigqueryQuery, BigqueryTableType, @@ -145,9 +147,11 @@ def __init__( self, report: BigQuerySchemaApiPerfReport, client: bigquery.Client, + projects_client: resourcemanager_v3.ProjectsClient, datacatalog_client: Optional[datacatalog_v1.PolicyTagManagerClient] = None, ) -> None: self.bq_client = client + self.projects_client = projects_client self.report = report self.datacatalog_client = datacatalog_client @@ -181,7 +185,7 @@ def _should_retry(exc: BaseException) -> bool: page_token = None projects: List[BigqueryProject] = [] - with self.report.list_projects: + with self.report.list_projects_timer: while True: try: self.report.num_list_projects_api_requests += 1 @@ -191,7 +195,7 @@ def _should_retry(exc: BaseException) -> bool: # 'Quota exceeded: Your user exceeded quota for concurrent project.lists requests.' # Hence, added the api request retry of 15 min. # We already tried adding rate_limit externally, proving max_result and page_size - # to restrict the request calls inside list_project but issue still occured. + # to restrict the request calls inside list_project but issue still occurred. projects_iterator = self.bq_client.list_projects( max_results=max_results_per_page, page_token=page_token, @@ -218,10 +222,30 @@ def _should_retry(exc: BaseException) -> bool: return [] return projects + def get_projects_with_labels(self, labels: List[str]) -> List[BigqueryProject]: + with self.report.list_projects_with_labels_timer: + try: + projects = [] + labels_query = " OR ".join([f"labels.{label}" for label in labels]) + for project in self.projects_client.search_projects(query=labels_query): + projects.append( + BigqueryProject( + id=project.project_id, name=project.display_name + ) + ) + + return projects + + except Exception as e: + logger.error( + f"Error getting projects with labels: {labels}. {e}", exc_info=True + ) + return [] + def get_datasets_for_project_id( self, project_id: str, maxResults: Optional[int] = None ) -> List[BigqueryDataset]: - with self.report.list_datasets: + with self.report.list_datasets_timer: self.report.num_list_datasets_api_requests += 1 datasets = self.bq_client.list_datasets(project_id, max_results=maxResults) return [ @@ -595,3 +619,76 @@ def _make_bigquery_table_snapshot(snapshot: bigquery.Row) -> BigqueryTableSnapsh table=snapshot.base_table_name, ), ) + + +def query_project_list( + schema_api: BigQuerySchemaApi, + report: SourceReport, + filters: BigQueryFilter, +) -> Iterable[BigqueryProject]: + try: + projects = schema_api.get_projects() + + if not projects: # Report failure on exception and if empty list is returned + report.failure( + title="Get projects didn't return any project. ", + message="Maybe resourcemanager.projects.get permission is missing for the service account. " + "You can assign predefined roles/bigquery.metadataViewer role to your service account.", + ) + except Exception as e: + report.failure( + title="Failed to get BigQuery Projects", + message="Maybe resourcemanager.projects.get permission is missing for the service account. " + "You can assign predefined roles/bigquery.metadataViewer role to your service account.", + exc=e, + ) + projects = [] + + for project in projects: + if filters.filter_config.project_id_pattern.allowed(project.id): + yield project + else: + logger.debug( + f"Ignoring project {project.id} as it's not allowed by project_id_pattern" + ) + + +def get_projects( + schema_api: BigQuerySchemaApi, + report: SourceReport, + filters: BigQueryFilter, +) -> List[BigqueryProject]: + logger.info("Getting projects") + if filters.filter_config.project_ids: + return [ + BigqueryProject(id=project_id, name=project_id) + for project_id in filters.filter_config.project_ids + ] + elif filters.filter_config.project_labels: + return list(query_project_list_from_labels(schema_api, report, filters)) + else: + return list(query_project_list(schema_api, report, filters)) + + +def query_project_list_from_labels( + schema_api: BigQuerySchemaApi, + report: SourceReport, + filters: BigQueryFilter, +) -> Iterable[BigqueryProject]: + projects = schema_api.get_projects_with_labels(filters.filter_config.project_labels) + + if not projects: # Report failure on exception and if empty list is returned + report.report_failure( + "metadata-extraction", + "Get projects didn't return any project with any of the specified label(s). " + "Maybe resourcemanager.projects.list permission is missing for the service account. " + "You can assign predefined roles/bigquery.metadataViewer role to your service account.", + ) + + for project in projects: + if filters.filter_config.project_id_pattern.allowed(project.id): + yield project + else: + logger.debug( + f"Ignoring project {project.id} as it's not allowed by project_id_pattern" + ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py index c6a50a1c977f4e..8bcdc5a4a675d4 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py @@ -1,16 +1,12 @@ import logging import re from collections import defaultdict -from typing import Callable, Dict, Iterable, List, Optional, Set, Type, Union, cast +from typing import Dict, Iterable, List, Optional, Set, Type, Union, cast from google.cloud.bigquery.table import TableListItem from datahub.configuration.pattern_utils import is_schema_allowed, is_tag_allowed -from datahub.emitter.mce_builder import ( - make_data_platform_urn, - make_dataplatform_instance_urn, - make_tag_urn, -) +from datahub.emitter.mce_builder import make_tag_urn from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.mcp_builder import BigQueryDatasetKey, ContainerKey, ProjectIdKey from datahub.ingestion.api.workunit import MetadataWorkUnit @@ -41,6 +37,7 @@ from datahub.ingestion.source.bigquery_v2.common import ( BQ_EXTERNAL_DATASET_URL_TEMPLATE, BQ_EXTERNAL_TABLE_URL_TEMPLATE, + BigQueryIdentifierBuilder, ) from datahub.ingestion.source.bigquery_v2.profiler import BigqueryProfiler from datahub.ingestion.source.common.subtypes import ( @@ -160,16 +157,15 @@ def __init__( domain_registry: Optional[DomainRegistry], sql_parser_schema_resolver: SchemaResolver, profiler: BigqueryProfiler, - dataset_urn_builder: Callable[[str, str, str], str], + identifiers: BigQueryIdentifierBuilder, ): self.config = config self.report = report - self.bigquery_data_dictionary = bigquery_data_dictionary + self.schema_api = bigquery_data_dictionary self.domain_registry = domain_registry self.sql_parser_schema_resolver = sql_parser_schema_resolver self.profiler = profiler - self.gen_dataset_urn = dataset_urn_builder - self.platform: str = "bigquery" + self.identifiers = identifiers self.classification_handler = ClassificationHandler(self.config, self.report) self.data_reader: Optional[BigQueryDataReader] = None @@ -205,12 +201,8 @@ def get_dataplatform_instance_aspect( self, dataset_urn: str, project_id: str ) -> MetadataWorkUnit: aspect = DataPlatformInstanceClass( - platform=make_data_platform_urn(self.platform), - instance=( - make_dataplatform_instance_urn(self.platform, project_id) - if self.config.include_data_platform_instance - else None - ), + platform=self.identifiers.make_data_platform_urn(), + instance=self.identifiers.make_dataplatform_instance_urn(project_id), ) return MetadataChangeProposalWrapper( entityUrn=dataset_urn, aspect=aspect @@ -220,7 +212,7 @@ def gen_dataset_key(self, db_name: str, schema: str) -> ContainerKey: return BigQueryDatasetKey( project_id=db_name, dataset_id=schema, - platform=self.platform, + platform=self.identifiers.platform, env=self.config.env, backcompat_env_as_instance=True, ) @@ -228,7 +220,7 @@ def gen_dataset_key(self, db_name: str, schema: str) -> ContainerKey: def gen_project_id_key(self, database: str) -> ContainerKey: return ProjectIdKey( project_id=database, - platform=self.platform, + platform=self.identifiers.platform, env=self.config.env, backcompat_env_as_instance=True, ) @@ -285,14 +277,12 @@ def _process_project( project_id = bigquery_project.id try: - bigquery_project.datasets = ( - self.bigquery_data_dictionary.get_datasets_for_project_id(project_id) + bigquery_project.datasets = self.schema_api.get_datasets_for_project_id( + project_id ) except Exception as e: - if ( - self.config.project_id or self.config.project_ids - ) and "not enabled BigQuery." in str(e): + if self.config.project_ids and "not enabled BigQuery." in str(e): action_mesage = ( "The project has not enabled BigQuery API. " "Did you mistype project id in recipe ?" @@ -419,7 +409,7 @@ def _process_schema( or self.config.include_views or self.config.include_table_snapshots ): - columns = self.bigquery_data_dictionary.get_columns_for_dataset( + columns = self.schema_api.get_columns_for_dataset( project_id=project_id, dataset_name=dataset_name, column_limit=self.config.column_limit, @@ -459,9 +449,7 @@ def _process_schema( ) elif self.store_table_refs: # Need table_refs to calculate lineage and usage - for table_item in self.bigquery_data_dictionary.list_tables( - dataset_name, project_id - ): + for table_item in self.schema_api.list_tables(dataset_name, project_id): identifier = BigqueryTableIdentifier( project_id=project_id, dataset=dataset_name, @@ -481,7 +469,7 @@ def _process_schema( if self.config.include_views: db_views[dataset_name] = list( - self.bigquery_data_dictionary.get_views_for_dataset( + self.schema_api.get_views_for_dataset( project_id, dataset_name, self.config.is_profiling_enabled(), @@ -500,7 +488,7 @@ def _process_schema( if self.config.include_table_snapshots: db_snapshots[dataset_name] = list( - self.bigquery_data_dictionary.get_snapshots_for_dataset( + self.schema_api.get_snapshots_for_dataset( project_id, dataset_name, self.config.is_profiling_enabled(), @@ -747,7 +735,9 @@ def gen_view_dataset_workunits( viewLogic=view_definition_string or "", ) yield MetadataChangeProposalWrapper( - entityUrn=self.gen_dataset_urn(project_id, dataset_name, table.name), + entityUrn=self.identifiers.gen_dataset_urn( + project_id, dataset_name, table.name + ), aspect=view_properties_aspect, ).as_workunit() @@ -786,7 +776,9 @@ def gen_dataset_workunits( tags_to_add: Optional[List[str]] = None, custom_properties: Optional[Dict[str, str]] = None, ) -> Iterable[MetadataWorkUnit]: - dataset_urn = self.gen_dataset_urn(project_id, dataset_name, table.name) + dataset_urn = self.identifiers.gen_dataset_urn( + project_id, dataset_name, table.name + ) status = Status(removed=False) yield MetadataChangeProposalWrapper( @@ -954,7 +946,7 @@ def gen_schema_metadata( ) -> MetadataWorkUnit: schema_metadata = SchemaMetadata( schemaName=str(dataset_name), - platform=make_data_platform_urn(self.platform), + platform=self.identifiers.make_data_platform_urn(), version=0, hash="", platformSchema=MySqlDDL(tableSchema=""), @@ -999,7 +991,7 @@ def get_tables_for_dataset( for table_item in table_items: items_to_get[table_item] = table_items[table_item] if len(items_to_get) % max_batch_size == 0: - yield from self.bigquery_data_dictionary.get_tables_for_dataset( + yield from self.schema_api.get_tables_for_dataset( project_id, dataset_name, items_to_get, @@ -1009,7 +1001,7 @@ def get_tables_for_dataset( items_to_get.clear() if items_to_get: - yield from self.bigquery_data_dictionary.get_tables_for_dataset( + yield from self.schema_api.get_tables_for_dataset( project_id, dataset_name, items_to_get, @@ -1028,9 +1020,7 @@ def get_core_table_details( # Dict to store sharded table and the last seen max shard id sharded_tables: Dict[str, TableListItem] = {} - for table in self.bigquery_data_dictionary.list_tables( - dataset_name, project_id - ): + for table in self.schema_api.list_tables(dataset_name, project_id): table_identifier = BigqueryTableIdentifier( project_id=project_id, dataset=dataset_name, diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_test_connection.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_test_connection.py index 3aac78c154b2ee..d0f111f451c0e1 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_test_connection.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_test_connection.py @@ -12,6 +12,7 @@ from datahub.ingestion.source.bigquery_v2.bigquery_config import BigQueryV2Config from datahub.ingestion.source.bigquery_v2.bigquery_report import BigQueryV2Report from datahub.ingestion.source.bigquery_v2.bigquery_schema import BigQuerySchemaApi +from datahub.ingestion.source.bigquery_v2.common import BigQueryIdentifierBuilder from datahub.ingestion.source.bigquery_v2.lineage import BigqueryLineageExtractor from datahub.ingestion.source.bigquery_v2.usage import BigQueryUsageExtractor from datahub.sql_parsing.schema_resolver import SchemaResolver @@ -96,7 +97,9 @@ def metadata_read_capability_test( client: bigquery.Client = config.get_bigquery_client() assert client bigquery_data_dictionary = BigQuerySchemaApi( - BigQueryV2Report().schema_api_perf, client + report=BigQueryV2Report().schema_api_perf, + projects_client=config.get_projects_client(), + client=client, ) result = bigquery_data_dictionary.get_datasets_for_project_id( project_id, 10 @@ -134,7 +137,7 @@ def lineage_capability_test( report: BigQueryV2Report, ) -> CapabilityReport: lineage_extractor = BigqueryLineageExtractor( - connection_conf, report, lambda ref: "" + connection_conf, report, BigQueryIdentifierBuilder(connection_conf, report) ) for project_id in project_ids: try: @@ -158,7 +161,7 @@ def usage_capability_test( connection_conf, report, schema_resolver=SchemaResolver(platform="bigquery"), - dataset_urn_builder=lambda ref: "", + identifiers=BigQueryIdentifierBuilder(connection_conf, report), ) for project_id in project_ids: try: diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/common.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/common.py index e38ab07855b8be..5d2358fb8d05ea 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/common.py @@ -1,5 +1,103 @@ +from typing import Optional + +from datahub.configuration.common import AllowDenyPattern +from datahub.configuration.pattern_utils import is_schema_allowed +from datahub.emitter.mce_builder import ( + make_data_platform_urn, + make_dataplatform_instance_urn, + make_dataset_urn, + make_user_urn, +) +from datahub.ingestion.api.source import SourceReport +from datahub.ingestion.source.bigquery_v2.bigquery_audit import ( + BigqueryTableIdentifier, + BigQueryTableRef, +) +from datahub.ingestion.source.bigquery_v2.bigquery_config import ( + BigQueryFilterConfig, + BigQueryIdentifierConfig, +) + BQ_DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%SZ" BQ_DATE_SHARD_FORMAT = "%Y%m%d" BQ_EXTERNAL_TABLE_URL_TEMPLATE = "https://console.cloud.google.com/bigquery?project={project}&ws=!1m5!1m4!4m3!1s{project}!2s{dataset}!3s{table}" BQ_EXTERNAL_DATASET_URL_TEMPLATE = "https://console.cloud.google.com/bigquery?project={project}&ws=!1m4!1m3!3m2!1s{project}!2s{dataset}" + +BQ_SYSTEM_TABLES_PATTERN = [r".*\.INFORMATION_SCHEMA\..*", r".*\.__TABLES__.*"] + + +class BigQueryIdentifierBuilder: + platform = "bigquery" + + def __init__( + self, + identifier_config: BigQueryIdentifierConfig, + structured_reporter: SourceReport, + ) -> None: + self.identifier_config = identifier_config + if self.identifier_config.enable_legacy_sharded_table_support: + BigqueryTableIdentifier._BQ_SHARDED_TABLE_SUFFIX = "" + self.structured_reporter = structured_reporter + + def gen_dataset_urn( + self, project_id: str, dataset_name: str, table: str, use_raw_name: bool = False + ) -> str: + datahub_dataset_name = BigqueryTableIdentifier(project_id, dataset_name, table) + return make_dataset_urn( + self.platform, + ( + str(datahub_dataset_name) + if not use_raw_name + else datahub_dataset_name.raw_table_name() + ), + self.identifier_config.env, + ) + + def gen_dataset_urn_from_raw_ref(self, ref: BigQueryTableRef) -> str: + return self.gen_dataset_urn( + ref.table_identifier.project_id, + ref.table_identifier.dataset, + ref.table_identifier.table, + use_raw_name=True, + ) + + def gen_user_urn(self, user_email: str) -> str: + return make_user_urn(user_email.split("@")[0]) + + def make_data_platform_urn(self) -> str: + return make_data_platform_urn(self.platform) + + def make_dataplatform_instance_urn(self, project_id: str) -> Optional[str]: + return ( + make_dataplatform_instance_urn(self.platform, project_id) + if self.identifier_config.include_data_platform_instance + else None + ) + + +class BigQueryFilter: + def __init__( + self, filter_config: BigQueryFilterConfig, structured_reporter: SourceReport + ) -> None: + self.filter_config = filter_config + self.structured_reporter = structured_reporter + + def is_allowed(self, table_id: BigqueryTableIdentifier) -> bool: + return AllowDenyPattern(deny=BQ_SYSTEM_TABLES_PATTERN).allowed( + str(table_id) + ) and ( + self.is_project_allowed(table_id.project_id) + and is_schema_allowed( + self.filter_config.dataset_pattern, + table_id.dataset, + table_id.project_id, + self.filter_config.match_fully_qualified_names, + ) + and self.filter_config.table_pattern.allowed(str(table_id)) + ) # TODO: use view_pattern ? + + def is_project_allowed(self, project_id: str) -> bool: + if self.filter_config.project_ids: + return project_id in self.filter_config.project_ids + return self.filter_config.project_id_pattern.allowed(project_id) diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py index 496bd64d3b4fe2..16d472d4dedd2a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py @@ -42,7 +42,10 @@ BigQuerySchemaApi, BigqueryTableSnapshot, ) -from datahub.ingestion.source.bigquery_v2.common import BQ_DATETIME_FORMAT +from datahub.ingestion.source.bigquery_v2.common import ( + BQ_DATETIME_FORMAT, + BigQueryIdentifierBuilder, +) from datahub.ingestion.source.bigquery_v2.queries import ( BQ_FILTER_RULE_TEMPLATE_V2_LINEAGE, bigquery_audit_metadata_query_template_lineage, @@ -225,12 +228,12 @@ def __init__( self, config: BigQueryV2Config, report: BigQueryV2Report, - dataset_urn_builder: Callable[[BigQueryTableRef], str], + identifiers: BigQueryIdentifierBuilder, redundant_run_skip_handler: Optional[RedundantLineageRunSkipHandler] = None, ): self.config = config self.report = report - self.dataset_urn_builder = dataset_urn_builder + self.identifiers = identifiers self.audit_log_api = BigQueryAuditLogApi( report.audit_log_api_perf, self.config.rate_limit, @@ -427,7 +430,7 @@ def populate_snapshot_lineage( def gen_lineage_workunits_for_table( self, lineage: Dict[str, Set[LineageEdge]], table_ref: BigQueryTableRef ) -> Iterable[MetadataWorkUnit]: - dataset_urn = self.dataset_urn_builder(table_ref) + dataset_urn = self.identifiers.gen_dataset_urn_from_raw_ref(table_ref) lineage_info = self.get_lineage_for_table( bq_table=table_ref, @@ -479,7 +482,9 @@ def lineage_via_catalog_lineage_api( lineage_client: lineage_v1.LineageClient = lineage_v1.LineageClient() data_dictionary = BigQuerySchemaApi( - self.report.schema_api_perf, self.config.get_bigquery_client() + self.report.schema_api_perf, + self.config.get_bigquery_client(), + self.config.get_projects_client(), ) # Filtering datasets @@ -870,7 +875,9 @@ def get_lineage_for_table( # even if the lineage is same but the order is different. for upstream in sorted(self.get_upstream_tables(bq_table, lineage_metadata)): upstream_table = BigQueryTableRef.from_string_name(upstream.table) - upstream_table_urn = self.dataset_urn_builder(upstream_table) + upstream_table_urn = self.identifiers.gen_dataset_urn_from_raw_ref( + upstream_table + ) # Generate table-level lineage. upstream_table_class = UpstreamClass( diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries_extractor.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries_extractor.py new file mode 100644 index 00000000000000..8457f4e37b3d26 --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries_extractor.py @@ -0,0 +1,458 @@ +import functools +import logging +import pathlib +import tempfile +from dataclasses import dataclass, field +from datetime import datetime, timezone +from typing import Dict, Iterable, List, Optional, TypedDict + +from google.cloud.bigquery import Client +from pydantic import Field + +from datahub.configuration.common import AllowDenyPattern +from datahub.configuration.time_window_config import ( + BaseTimeWindowConfig, + get_time_bucket, +) +from datahub.ingestion.api.report import Report +from datahub.ingestion.api.source import SourceReport +from datahub.ingestion.api.source_helpers import auto_workunit +from datahub.ingestion.api.workunit import MetadataWorkUnit +from datahub.ingestion.graph.client import DataHubGraph +from datahub.ingestion.source.bigquery_v2.bigquery_audit import BigqueryTableIdentifier +from datahub.ingestion.source.bigquery_v2.bigquery_config import BigQueryBaseConfig +from datahub.ingestion.source.bigquery_v2.bigquery_schema import ( + BigqueryProject, + BigQuerySchemaApi, + get_projects, +) +from datahub.ingestion.source.bigquery_v2.common import ( + BQ_DATETIME_FORMAT, + BigQueryFilter, + BigQueryIdentifierBuilder, +) +from datahub.ingestion.source.usage.usage_common import BaseUsageConfig +from datahub.sql_parsing.schema_resolver import SchemaResolver +from datahub.sql_parsing.sql_parsing_aggregator import ( + ObservedQuery, + SqlAggregatorReport, + SqlParsingAggregator, +) +from datahub.sql_parsing.sqlglot_utils import get_query_fingerprint +from datahub.utilities.file_backed_collections import ( + ConnectionWrapper, + FileBackedDict, + FileBackedList, +) +from datahub.utilities.perf_timer import PerfTimer +from datahub.utilities.stats_collections import TopKDict, int_top_k_dict +from datahub.utilities.time import datetime_to_ts_millis + +logger = logging.getLogger(__name__) + + +class BigQueryTableReference(TypedDict): + project_id: str + dataset_id: str + table_id: str + + +class DMLJobStatistics(TypedDict): + inserted_row_count: int + deleted_row_count: int + updated_row_count: int + + +class BigQueryJob(TypedDict): + job_id: str + project_id: str + creation_time: datetime + user_email: str + query: str + session_id: Optional[str] + query_hash: Optional[str] + + statement_type: str + destination_table: Optional[BigQueryTableReference] + referenced_tables: List[BigQueryTableReference] + # NOTE: This does not capture referenced_view unlike GCP Logging Event + + +class BigQueryQueriesExtractorConfig(BigQueryBaseConfig): + # TODO: Support stateful ingestion for the time windows. + window: BaseTimeWindowConfig = BaseTimeWindowConfig() + + local_temp_path: Optional[pathlib.Path] = Field( + default=None, + description="Local path to store the audit log.", + # TODO: For now, this is simply an advanced config to make local testing easier. + # Eventually, we will want to store date-specific files in the directory and use it as a cache. + hidden_from_docs=True, + ) + + user_email_pattern: AllowDenyPattern = Field( + default=AllowDenyPattern.allow_all(), + description="regex patterns for user emails to filter in usage.", + ) + + include_lineage: bool = True + include_queries: bool = True + include_usage_statistics: bool = True + include_query_usage_statistics: bool = False + include_operations: bool = True + + region_qualifiers: List[str] = Field( + default=["region-us", "region-eu"], + description="BigQuery regions to be scanned for bigquery jobs. " + "See [this](https://cloud.google.com/bigquery/docs/information-schema-jobs#scope_and_syntax) for details.", + ) + + +@dataclass +class BigQueryQueriesExtractorReport(Report): + query_log_fetch_timer: PerfTimer = field(default_factory=PerfTimer) + audit_log_preprocessing_timer: PerfTimer = field(default_factory=PerfTimer) + audit_log_load_timer: PerfTimer = field(default_factory=PerfTimer) + sql_aggregator: Optional[SqlAggregatorReport] = None + num_queries_by_project: TopKDict[str, int] = field(default_factory=int_top_k_dict) + + num_total_queries: int = 0 + num_unique_queries: int = 0 + + +class BigQueryQueriesExtractor: + """ + Extracts query audit log and generates usage/lineage/operation workunits. + + Some notable differences in this wrt older usage extraction method are: + 1. For every lineage/operation workunit, corresponding query id is also present + 2. Operation aspect for a particular query is emitted at max once(last occurence) for a day + 3. "DROP" operation accounts for usage here + + """ + + def __init__( + self, + connection: Client, + schema_api: BigQuerySchemaApi, + config: BigQueryQueriesExtractorConfig, + structured_report: SourceReport, + filters: BigQueryFilter, + identifiers: BigQueryIdentifierBuilder, + graph: Optional[DataHubGraph] = None, + schema_resolver: Optional[SchemaResolver] = None, + discovered_tables: Optional[List[str]] = None, + ): + self.connection = connection + + self.config = config + self.filters = filters + self.identifiers = identifiers + self.schema_api = schema_api + self.report = BigQueryQueriesExtractorReport() + # self.filters = filters + self.discovered_tables = discovered_tables + + self.structured_report = structured_report + + self.aggregator = SqlParsingAggregator( + platform=self.identifiers.platform, + platform_instance=self.identifiers.identifier_config.platform_instance, + env=self.identifiers.identifier_config.env, + schema_resolver=schema_resolver, + graph=graph, + eager_graph_load=False, + generate_lineage=self.config.include_lineage, + generate_queries=self.config.include_queries, + generate_usage_statistics=self.config.include_usage_statistics, + generate_query_usage_statistics=self.config.include_query_usage_statistics, + usage_config=BaseUsageConfig( + bucket_duration=self.config.window.bucket_duration, + start_time=self.config.window.start_time, + end_time=self.config.window.end_time, + user_email_pattern=self.config.user_email_pattern, + ), + generate_operations=self.config.include_operations, + is_temp_table=self.is_temp_table, + is_allowed_table=self.is_allowed_table, + format_queries=False, + ) + self.report.sql_aggregator = self.aggregator.report + + @functools.cached_property + def local_temp_path(self) -> pathlib.Path: + if self.config.local_temp_path: + assert self.config.local_temp_path.is_dir() + return self.config.local_temp_path + + path = pathlib.Path(tempfile.mkdtemp()) + path.mkdir(parents=True, exist_ok=True) + logger.info(f"Using local temp path: {path}") + return path + + def is_temp_table(self, name: str) -> bool: + try: + return BigqueryTableIdentifier.from_string_name(name).dataset.startswith( + self.config.temp_table_dataset_prefix + ) + except Exception: + logger.warning(f"Error parsing table name {name} ") + return False + + def is_allowed_table(self, name: str) -> bool: + try: + table_id = BigqueryTableIdentifier.from_string_name(name) + if self.discovered_tables and str(table_id) not in self.discovered_tables: + return False + return self.filters.is_allowed(table_id) + except Exception: + logger.warning(f"Error parsing table name {name} ") + return False + + def get_workunits_internal( + self, + ) -> Iterable[MetadataWorkUnit]: + # TODO: Add some logic to check if the cached audit log is stale or not. + audit_log_file = self.local_temp_path / "audit_log.sqlite" + use_cached_audit_log = audit_log_file.exists() + + queries: FileBackedList[ObservedQuery] + if use_cached_audit_log: + logger.info("Using cached audit log") + shared_connection = ConnectionWrapper(audit_log_file) + queries = FileBackedList(shared_connection) + else: + audit_log_file.unlink(missing_ok=True) + + shared_connection = ConnectionWrapper(audit_log_file) + queries = FileBackedList(shared_connection) + entry: ObservedQuery + + with self.report.query_log_fetch_timer: + for project in get_projects( + self.schema_api, self.structured_report, self.filters + ): + for entry in self.fetch_query_log(project): + self.report.num_queries_by_project[project.id] += 1 + queries.append(entry) + self.report.num_total_queries = len(queries) + + with self.report.audit_log_preprocessing_timer: + # Preprocessing stage that deduplicates the queries using query hash per usage bucket + queries_deduped: FileBackedDict[Dict[int, ObservedQuery]] + queries_deduped = self.deduplicate_queries(queries) + self.report.num_unique_queries = len(queries_deduped) + + with self.report.audit_log_load_timer: + i = 0 + for query_instances in queries_deduped.values(): + for _, query in query_instances.items(): + if i > 0 and i % 10000 == 0: + logger.info(f"Added {i} query log entries to SQL aggregator") + + self.aggregator.add(query) + i += 1 + + yield from auto_workunit(self.aggregator.gen_metadata()) + + def deduplicate_queries( + self, queries: FileBackedList[ObservedQuery] + ) -> FileBackedDict[Dict[int, ObservedQuery]]: + + # This fingerprint based deduplication is done here to reduce performance hit due to + # repetitive sql parsing while adding observed query to aggregator that would otherwise + # parse same query multiple times. In future, aggregator may absorb this deduplication. + # With current implementation, it is possible that "Operation"(e.g. INSERT) is reported + # only once per day, although it may have happened multiple times throughout the day. + + queries_deduped: FileBackedDict[Dict[int, ObservedQuery]] = FileBackedDict() + + for i, query in enumerate(queries): + if i > 0 and i % 10000 == 0: + logger.info(f"Preprocessing completed for {i} query log entries") + + # query = ObservedQuery(**asdict(query)) + + time_bucket = 0 + if query.timestamp: + time_bucket = datetime_to_ts_millis( + get_time_bucket(query.timestamp, self.config.window.bucket_duration) + ) + + # Not using original BQ query hash as it's not always present + query.query_hash = get_query_fingerprint( + query.query, self.identifiers.platform, fast=True + ) + + query_instances = queries_deduped.setdefault(query.query_hash, {}) + + observed_query = query_instances.setdefault(time_bucket, query) + + # If the query already exists for this time bucket, update its attributes + if observed_query is not query: + observed_query.usage_multiplier += 1 + observed_query.timestamp = query.timestamp + + return queries_deduped + + def fetch_query_log(self, project: BigqueryProject) -> Iterable[ObservedQuery]: + + # Multi-regions from https://cloud.google.com/bigquery/docs/locations#supported_locations + regions = self.config.region_qualifiers + + for region in regions: + with self.structured_report.report_exc( + f"Error fetching query log from BQ Project {project.id} for {region}" + ): + yield from self.fetch_region_query_log(project, region) + + def fetch_region_query_log( + self, project: BigqueryProject, region: str + ) -> Iterable[ObservedQuery]: + + # Each region needs to be a different query + query_log_query = _build_enriched_query_log_query( + project_id=project.id, + region=region, + start_time=self.config.window.start_time, + end_time=self.config.window.end_time, + ) + + logger.info(f"Fetching query log from BQ Project {project.id} for {region}") + resp = self.connection.query(query_log_query) + + for i, row in enumerate(resp): + if i > 0 and i % 1000 == 0: + logger.info(f"Processed {i} query log rows so far") + try: + entry = self._parse_audit_log_row(row) + except Exception as e: + self.structured_report.warning( + "Error parsing query log row", + context=f"{row}", + exc=e, + ) + else: + yield entry + + def _parse_audit_log_row(self, row: BigQueryJob) -> ObservedQuery: + timestamp: datetime = row["creation_time"] + timestamp = timestamp.astimezone(timezone.utc) + + # Usually bigquery identifiers are always referred as . and only + # temporary tables are referred as
alone without project or dataset name. + # Note that temporary tables can also be referenced using _SESSION.
+ # More details here - https://cloud.google.com/bigquery/docs/multi-statement-queries + # Also _ at start considers this as temp dataset as per `temp_table_dataset_prefix` config + TEMP_TABLE_QUALIFIER = "_SESSION" + + query = _extract_query_text(row) + + entry = ObservedQuery( + query=query, + session_id=row["session_id"], + timestamp=row["creation_time"], + user=( + self.identifiers.gen_user_urn(row["user_email"]) + if row["user_email"] + else None + ), + default_db=row["project_id"], + default_schema=TEMP_TABLE_QUALIFIER, + query_hash=row["query_hash"], + extra_info={ + "job_id": row["job_id"], + "statement_type": row["statement_type"], + "destination_table": row["destination_table"], + "referenced_tables": row["referenced_tables"], + }, + ) + + return entry + + +def _extract_query_text(row: BigQueryJob) -> str: + # We wrap select statements in a CTE to make them parseable as DML statement. + # This is a workaround to support the case where the user runs a query and inserts the result into a table. + # NOTE This will result in showing modified query instead of original query in DataHub UI + # Alternatively, this support needs to be added more natively in aggregator.add_observed_query + if ( + row["statement_type"] == "SELECT" + and row["destination_table"] + and not row["destination_table"]["table_id"].startswith("anon") + ): + table_name = BigqueryTableIdentifier( + row["destination_table"]["project_id"], + row["destination_table"]["dataset_id"], + row["destination_table"]["table_id"], + ).raw_table_name() + query = f"""CREATE TABLE `{table_name}` AS + ( + {row["query"]} + )""" + else: + query = row["query"] + return query + + +def _build_enriched_query_log_query( + project_id: str, + region: str, + start_time: datetime, + end_time: datetime, +) -> str: + + audit_start_time = start_time.strftime(BQ_DATETIME_FORMAT) + audit_end_time = end_time.strftime(BQ_DATETIME_FORMAT) + + # List of all statement types + # https://cloud.google.com/bigquery/docs/reference/auditlogs/rest/Shared.Types/BigQueryAuditMetadata.QueryStatementType + UNSUPPORTED_STATEMENT_TYPES = [ + # procedure + "CREATE_PROCEDURE", + "DROP_PROCEDURE", + "CALL", + "SCRIPT", # individual statements in executed procedure are present as separate jobs + # schema + "CREATE_SCHEMA", + "DROP_SCHEMA", + # function + "CREATE_FUNCTION", + "CREATE_TABLE_FUNCTION", + "DROP_FUNCTION", + # policies + "CREATE_ROW_ACCESS_POLICY", + "DROP_ROW_ACCESS_POLICY", + ] + + unsupported_statement_types = ",".join( + [f"'{statement_type}'" for statement_type in UNSUPPORTED_STATEMENT_TYPES] + ) + + # NOTE the use of partition column creation_time as timestamp here. + # Currently, only required columns are fetched. There are more columns such as + # total_slot_ms, job_type, total_bytes_billed, dml_statistics(inserted_row_count, etc) + # that may be fetched as required in future. Refer below link for list of all columns + # https://cloud.google.com/bigquery/docs/information-schema-jobs#schema + return f"""\ + SELECT + job_id, + project_id, + creation_time, + user_email, + query, + session_info.session_id as session_id, + query_info.query_hashes.normalized_literals as query_hash, + statement_type, + destination_table, + referenced_tables + FROM + `{project_id}`.`{region}`.INFORMATION_SCHEMA.JOBS + WHERE + creation_time >= '{audit_start_time}' AND + creation_time <= '{audit_end_time}' AND + error_result is null AND + not CONTAINS_SUBSTR(query, '.INFORMATION_SCHEMA.') AND + statement_type not in ({unsupported_statement_types}) + ORDER BY creation_time + """ diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py index 6824d630a2277a..876ffab85ba311 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py @@ -26,7 +26,6 @@ BaseTimeWindowConfig, get_time_bucket, ) -from datahub.emitter.mce_builder import make_user_urn from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.api.closeable import Closeable from datahub.ingestion.api.source_helpers import auto_empty_dataset_usage_statistics @@ -44,7 +43,10 @@ ) from datahub.ingestion.source.bigquery_v2.bigquery_config import BigQueryV2Config from datahub.ingestion.source.bigquery_v2.bigquery_report import BigQueryV2Report -from datahub.ingestion.source.bigquery_v2.common import BQ_DATETIME_FORMAT +from datahub.ingestion.source.bigquery_v2.common import ( + BQ_DATETIME_FORMAT, + BigQueryIdentifierBuilder, +) from datahub.ingestion.source.bigquery_v2.queries import ( BQ_FILTER_RULE_TEMPLATE_V2_USAGE, bigquery_audit_metadata_query_template_usage, @@ -313,13 +315,13 @@ def __init__( report: BigQueryV2Report, *, schema_resolver: SchemaResolver, - dataset_urn_builder: Callable[[BigQueryTableRef], str], + identifiers: BigQueryIdentifierBuilder, redundant_run_skip_handler: Optional[RedundantUsageRunSkipHandler] = None, ): self.config: BigQueryV2Config = config self.report: BigQueryV2Report = report self.schema_resolver = schema_resolver - self.dataset_urn_builder = dataset_urn_builder + self.identifiers = identifiers # Replace hash of query with uuid if there are hash conflicts self.uuid_to_query: Dict[str, str] = {} @@ -404,7 +406,9 @@ def _get_workunits_internal( bucket_duration=self.config.bucket_duration, ), dataset_urns={ - self.dataset_urn_builder(BigQueryTableRef.from_string_name(ref)) + self.identifiers.gen_dataset_urn_from_raw_ref( + BigQueryTableRef.from_string_name(ref) + ) for ref in table_refs }, ) @@ -535,7 +539,7 @@ def _generate_usage_workunits( user_freq=entry.user_freq, column_freq=entry.column_freq, bucket_duration=self.config.bucket_duration, - resource_urn_builder=self.dataset_urn_builder, + resource_urn_builder=self.identifiers.gen_dataset_urn_from_raw_ref, top_n_queries=self.config.usage.top_n_queries, format_sql_queries=self.config.usage.format_sql_queries, queries_character_limit=self.config.usage.queries_character_limit, @@ -710,12 +714,14 @@ def _create_operation_workunit( affected_datasets = [] if event.query_event and event.query_event.referencedTables: for table in event.query_event.referencedTables: - affected_datasets.append(table.to_urn(self.config.env)) + affected_datasets.append( + self.identifiers.gen_dataset_urn_from_raw_ref(table) + ) operation_aspect = OperationClass( timestampMillis=reported_time, lastUpdatedTimestamp=operational_meta.last_updated_timestamp, - actor=make_user_urn(operational_meta.actor_email.split("@")[0]), + actor=self.identifiers.gen_user_urn(operational_meta.actor_email), operationType=operational_meta.statement_type, customOperationType=operational_meta.custom_type, affectedDatasets=affected_datasets, @@ -729,7 +735,7 @@ def _create_operation_workunit( operation_aspect.numAffectedRows = event.query_event.numAffectedRows return MetadataChangeProposalWrapper( - entityUrn=destination_table.to_urn(env=self.config.env), + entityUrn=self.identifiers.gen_dataset_urn_from_raw_ref(destination_table), aspect=operation_aspect, ).as_workunit() diff --git a/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/path_spec.py b/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/path_spec.py index 71765f9be5e323..4c2b0c276b9e7e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/path_spec.py +++ b/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/path_spec.py @@ -1,6 +1,8 @@ +import datetime import logging import os import re +from enum import Enum from typing import Any, Dict, List, Optional, Tuple, Union import parse @@ -28,6 +30,57 @@ "gzip", ] +java_to_python_mapping = { + "yyyy": "Y", + "MM": "m", + "dd": "d", + "HH": "H", + "mm": "M", + "ss": "S", +} + + +class SortKeyType(Enum): + STRING = "STRING" + INTEGER = "INTEGER" + FLOAT = "FLOAT" + DATETIME = "DATETIME" + DATE = "DATE" + + def __str__(self): + return self.value + + +class SortKey(ConfigModel): + key: str = Field( + description="The key to sort on. This can be a compound key based on the path_spec variables." + ) + type: SortKeyType = Field( + default=SortKeyType.STRING, + description="The date format to use when sorting. This is used to parse the date from the key. The format should follow the java [SimpleDateFormat](https://docs.oracle.com/javase/8/docs/api/java/text/SimpleDateFormat.html) format.", + ) + + date_format: Optional[str] = Field( + default=None, + type=str, + description="The date format to use when sorting. This is used to parse the date from the key. The format should follow the java [SimpleDateFormat](https://docs.oracle.com/javase/8/docs/api/java/text/SimpleDateFormat.html) format.", + ) + + @pydantic.validator("date_format", always=True) + def convert_date_format_to_python_format(cls, v: Optional[str]) -> Optional[str]: + if v is None: + return None + else: + for java_format, python_format in java_to_python_mapping.items(): + v = v.replace(java_format, f"%{python_format}") + return v + + +class FolderTraversalMethod(Enum): + ALL = "ALL" + MIN_MAX = "MIN_MAX" + MAX = "MAX" + class PathSpec(ConfigModel): class Config: @@ -37,7 +90,7 @@ class Config: description="Path to table. Name variable `{table}` is used to mark the folder with dataset. In absence of `{table}`, file level dataset will be created. Check below examples for more details." ) exclude: Optional[List[str]] = Field( - default=None, + default=[], description="list of paths in glob pattern which will be excluded while scanning for the datasets", ) file_types: List[str] = Field( @@ -55,6 +108,13 @@ class Config: description="Display name of the dataset.Combination of named variables from include path and strings", ) + # This is not used yet, but will be used in the future to sort the partitions + sort_key: Optional[SortKey] = Field( + hidden_from_docs=True, + default=None, + description="Sort key to use when sorting the partitions. This is useful when the partitions are not sorted in the order of the data. The key can be a compound key based on the path_spec variables.", + ) + enable_compression: bool = Field( default=True, description="Enable or disable processing compressed files. Currently .gz and .bz files are supported.", @@ -70,8 +130,41 @@ class Config: description="Allow double stars in the include path. This can affect performance significantly if enabled", ) - def allowed(self, path: str) -> bool: + autodetect_partitions: bool = Field( + default=True, + description="Autodetect partition(s) from the path. If set to true, it will autodetect partition key/value if the folder format is {partition_key}={partition_value} for example `year=2024`", + ) + + traversal_method: FolderTraversalMethod = Field( + default=FolderTraversalMethod.MAX, + description="Method to traverse the folder. ALL: Traverse all the folders, MIN_MAX: Traverse the folders by finding min and max value, MAX: Traverse the folder with max value", + ) + + include_hidden_folders: bool = Field( + default=False, + description="Include hidden folders in the traversal (folders starting with . or _", + ) + + def is_path_hidden(self, path: str) -> bool: + # Split the path into directories and filename + dirs, filename = os.path.split(path) + + # Check the filename + if filename.startswith(".") or filename.startswith("_"): + return True + + # Check each directory in the path + for dir in dirs.split(os.sep): + if dir.startswith(".") or dir.startswith("_"): + return True + + return False + + def allowed(self, path: str, ignore_ext: bool = False) -> bool: logger.debug(f"Checking file to inclusion: {path}") + if self.is_path_hidden(path) and not self.include_hidden_folders: + return False + if not pathlib.PurePath(path).globmatch( self.glob_include, flags=pathlib.GLOBSTAR ): @@ -86,16 +179,20 @@ def allowed(self, path: str) -> bool: logger.debug(f"{path} is not excluded") ext = os.path.splitext(path)[1].strip(".") - if (ext == "" and self.default_extension is None) and ( - ext != "*" and ext not in self.file_types - ): - return False + if not ignore_ext: + if (ext == "" and self.default_extension is None) and ( + ext != "*" and ext not in self.file_types + ): + return False - logger.debug(f"{path} had selected extension {ext}") - logger.debug(f"{path} allowed for dataset creation") + logger.debug(f"{path} had selected extension {ext}") + logger.debug(f"{path} allowed for dataset creation") return True def dir_allowed(self, path: str) -> bool: + if self.glob_include.endswith("**"): + return self.allowed(path, ignore_ext=True) + path_slash = path.count("/") glob_slash = self.glob_include.count("/") if path_slash > glob_slash: @@ -126,13 +223,30 @@ def dir_allowed(self, path: str) -> bool: @classmethod def get_parsable_include(cls, include: str) -> str: parsable_include = include - for i in range(parsable_include.count("*")): - parsable_include = parsable_include.replace("*", f"{{folder[{i}]}}", 1) + if parsable_include.endswith("/{table}/**"): + # Remove the last two characters to make it parsable if it ends with {table}/** which marks autodetect partition + parsable_include = parsable_include[:-2] + else: + # Replace all * with {folder[i]} to make it parsable + for i in range(parsable_include.count("*")): + parsable_include = parsable_include.replace("*", f"{{folder[{i}]}}", 1) return parsable_include def get_named_vars(self, path: str) -> Union[None, parse.Result, parse.Match]: + if self.include.endswith("{table}/**"): + # If we have a partial path with ** at the end, we need to truncate the path to parse correctly + # parse needs to have exact number of folders to parse correctly and in case of ** we don't know the number of folders + # so we need to truncate the path to the last folder before ** to parse and get named vars correctly + splits = len(self.include[: self.include.find("{table}/")].split("/")) + path = "/".join(path.split("/", splits)[:-1]) + "/" + return self.compiled_include.parse(path) + def get_folder_named_vars( + self, path: str + ) -> Union[None, parse.Result, parse.Match]: + return self.compiled_folder_include.parse(path) + @pydantic.root_validator() def validate_no_double_stars(cls, values: Dict) -> Dict: if "include" not in values: @@ -227,6 +341,125 @@ def compiled_include(self): logger.debug(f"Setting compiled_include: {compiled_include}") return compiled_include + @cached_property + def compiled_folder_include(self): + parsable_folder_include = PathSpec.get_parsable_include(self.include).rsplit( + "/", 1 + )[0] + logger.debug(f"parsable_folder_include: {parsable_folder_include}") + compiled_folder_include = parse.compile(parsable_folder_include) + logger.debug(f"Setting compiled_folder_include: {compiled_folder_include}") + return compiled_folder_include + + @cached_property + def extract_variable_names(self): + # Regular expression to find all substrings enclosed in {} + pattern = r"\{(.*?)\}" + # Find all matches + matches = re.findall(pattern, self.include.split("{table}/")[1]) + return matches + + def get_partition_from_path(self, path: str) -> Optional[List[Tuple[str, str]]]: + # Automatic partition detection supports four methods to get partiton keys and values from path: + # Let's say we have the following path => year=2024/month=10/day=11 for this example you can specify the following path spec expressions: + # 1. User can specify partition_key and partition_value in the path like => {partition_key[0]}={partition_value[0]}/{partition_key[1]}={partition_value[1]}/{partition_key[2]}={partition_value[2]} + # 2. User can specify only partition key and the partition key will be used as partition name like => year={year}/month={month}/day={day} + # 3. You omit specifying anything and it will detect partiton key and value based on the equal signs (this only works if partitioned are specified in the key=value way. + # 4. if the path is in the form of /value1/value2/value3 we infer it from the path and assign partition_0, partition_1, partition_2 etc + + partition_keys: List[Tuple[str, str]] = [] + if self.include.find("{table}/"): + named_vars = self.get_named_vars(path) + if named_vars: + # If user has specified partition_key and partition_value in the path_spec then we use it to get partition keys + if "partition_key" in named_vars.named and ( + ( + "partition_value" in named_vars.named + and len(named_vars.named["partition_key"]) + == len(named_vars.named["partition_value"]) + ) + or ( + "partition" in named_vars.named + and len(named_vars.named["partition_key"]) + == len(named_vars.named["partition"]) + ) + ): + for key in named_vars.named["partition_key"]: + # We need to support both partition_value and partition as both were in our docs + if ( + "partition_value" in named_vars + and key in named_vars.named["partition_value"] + ) or ( + "partition" in named_vars + and key in named_vars.named["partition"] + ): + partition_keys.append( + ( + named_vars.named["partition_key"][key], + named_vars.named["partition_value"][key] + if "partition_value" in named_vars.named + else named_vars.named["partition"][key], + ) + ) + return partition_keys + else: + # TODO: Fix this message + logger.debug( + "Partition key or value not found. Fallbacking another mechanism to get partition keys" + ) + + partition_vars = self.extract_variable_names + if partition_vars: + for partition_key in partition_vars: + pkey: str = partition_key + index: Optional[int] = None + # We need to recreate the key and index from the partition_key + if partition_key.find("[") != -1: + pkey, index = partition_key.strip("]").split("[") + else: + pkey = partition_key + index = None + + if pkey in named_vars.named: + if index and index in named_vars.named[pkey]: + partition_keys.append( + (f"{pkey}_{index}", named_vars.named[pkey][index]) + ) + else: + partition_keys.append( + (partition_key, named_vars.named[partition_key]) + ) + return partition_keys + + # If user did not specified partition_key and partition_value in the path_spec then we use the default mechanism to get partition keys + if len(self.include.split("{table}/")) == 2: + num_slash = len(self.include.split("{table}/")[0].split("/")) + partition = path.split("/", num_slash)[num_slash] + else: + return None + if partition.endswith("/"): + partition = partition[:-1] + + # If partition is in the form of key=value we infer it from the path + if partition.find("=") != -1: + partition = partition.rsplit("/", 1)[0] + for partition_key in partition.split("/"): + if partition_key.find("=") != -1: + partition_keys.append(tuple(partition_key.split("="))) + else: + partition_split = partition.rsplit("/", 1) + if len(partition_split) == 1: + return None + partition = partition_split[0] + # If partition is in the form of /value1/value2/value3 we infer it from the path and assign partition_0, partition_1, partition_2 etc + num = 0 + for partition_value in partition.split("/"): + partition_keys.append((f"partition_{num}", partition_value)) + num += 1 + return partition_keys + + return None + @cached_property def glob_include(self): glob_include = re.sub(r"\{[^}]+\}", "*", self.include) @@ -244,7 +477,20 @@ def validate_path_spec(cls, values: Dict) -> Dict[str, Any]: ) return values + if values["include"] and values["autodetect_partitions"]: + include = values["include"] + if include.endswith("/"): + include = include[:-1] + + if include.endswith("{table}"): + values["include"] = include + "/**" + include_ext = os.path.splitext(values["include"])[1].strip(".") + if not include_ext: + include_ext = ( + "*" # if no extension is provided, we assume all files are allowed + ) + if ( include_ext not in values["file_types"] and include_ext != "*" @@ -263,6 +509,45 @@ def _extract_table_name(self, named_vars: dict) -> str: raise ValueError("path_spec.table_name is not set") return self.table_name.format_map(named_vars) + # TODO: Add support to sort partition folders by the defined partition key pattern. This is not implemented yet. + def extract_datetime_partition( + self, path: str, is_folder: bool = False + ) -> Optional[datetime.datetime]: + if self.sort_key is None: + return None + + if not self.sort_key.date_format and self.sort_key.type not in [ + SortKeyType.DATETIME, + SortKeyType.DATE, + ]: + return None + + if is_folder: + parsed_vars = self.get_folder_named_vars(path) + else: + parsed_vars = self.get_named_vars(path) + if parsed_vars is None: + return None + + partition_format = self.sort_key.key + datetime_format = self.sort_key.date_format + if datetime_format is None: + return None + + for var_key in parsed_vars.named: + var = parsed_vars.named[var_key] + if isinstance(var, dict): + for key in var: + template_key = var_key + f"[{key}]" + partition_format = partition_format.replace( + f"{{{template_key}}}", var[key] + ) + else: + partition_format.replace(f"{{{var_key}}}", var) + return datetime.datetime.strptime(partition_format, datetime_format).replace( + tzinfo=datetime.timezone.utc + ) + def extract_table_name_and_path(self, path: str) -> Tuple[str, str]: parsed_vars = self.get_named_vars(path) if parsed_vars is None or "table" not in parsed_vars.named: @@ -273,4 +558,4 @@ def extract_table_name_and_path(self, path: str) -> Tuple[str, str]: table_path = ( "/".join(path.split("/")[:depth]) + "/" + parsed_vars.named["table"] ) - return self._extract_table_name(parsed_vars.named), table_path + return self._extract_table_name(parsed_vars.named), table_path diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index e2b5f8378732c5..8d67551b9e1f2f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -276,6 +276,12 @@ class DBTCommonConfig( DBTEntitiesEnabled(), description="Controls for enabling / disabling metadata emission for different dbt entities (models, test definitions, test results, etc.)", ) + prefer_sql_parser_lineage: bool = Field( + default=False, + description="Normally we use dbt's metadata to generate table lineage. When enabled, we prefer results from the SQL parser when generating lineage instead. " + "This can be useful when dbt models reference tables directly, instead of using the ref() macro. " + "This requires that `skip_sources_in_lineage` is enabled.", + ) skip_sources_in_lineage: bool = Field( default=False, description="[Experimental] When enabled, dbt sources will not be included in the lineage graph. " @@ -366,13 +372,6 @@ class DBTCommonConfig( description="When enabled, includes the compiled code in the emitted metadata.", ) - prefer_sql_parser_lineage: bool = Field( - default=False, - description="Normally we use dbt's metadata to generate table lineage. When enabled, we prefer results from the SQL parser when generating lineage instead. " - "This can be useful when dbt models reference tables directly, instead of using the ref() macro. " - "This requires that `skip_sources_in_lineage` is enabled.", - ) - @validator("target_platform") def validate_target_platform_value(cls, target_platform: str) -> str: if target_platform.lower() == DBT_PLATFORM: @@ -438,15 +437,27 @@ def validate_include_column_lineage( return include_column_lineage - @validator("skip_sources_in_lineage") + @validator("skip_sources_in_lineage", always=True) def validate_skip_sources_in_lineage( cls, skip_sources_in_lineage: bool, values: Dict ) -> bool: - entites_enabled: Optional[DBTEntitiesEnabled] = values.get("entities_enabled") + entities_enabled: Optional[DBTEntitiesEnabled] = values.get("entities_enabled") + prefer_sql_parser_lineage: Optional[bool] = values.get( + "prefer_sql_parser_lineage" + ) + + if prefer_sql_parser_lineage and not skip_sources_in_lineage: + raise ValueError( + "`prefer_sql_parser_lineage` requires that `skip_sources_in_lineage` is enabled." + ) + if ( skip_sources_in_lineage - and entites_enabled - and entites_enabled.sources == EmitDirective.YES + and entities_enabled + and entities_enabled.sources == EmitDirective.YES + # When `prefer_sql_parser_lineage` is enabled, it's ok to have `skip_sources_in_lineage` enabled + # without also disabling sources. + and not prefer_sql_parser_lineage ): raise ValueError( "When `skip_sources_in_lineage` is enabled, `entities_enabled.sources` must be set to NO." @@ -454,16 +465,6 @@ def validate_skip_sources_in_lineage( return skip_sources_in_lineage - @validator("prefer_sql_parser_lineage") - def validate_prefer_sql_parser_lineage( - cls, prefer_sql_parser_lineage: bool, values: Dict - ) -> bool: - if prefer_sql_parser_lineage and not values.get("skip_sources_in_lineage"): - raise ValueError( - "`prefer_sql_parser_lineage` requires that `skip_sources_in_lineage` is enabled." - ) - return prefer_sql_parser_lineage - @dataclass class DBTColumn: @@ -769,23 +770,30 @@ def make_mapping_upstream_lineage( downstream_urn: str, node: DBTNode, convert_column_urns_to_lowercase: bool, + skip_sources_in_lineage: bool, ) -> UpstreamLineageClass: cll = [] - for column in node.columns or []: - field_name = column.name - if convert_column_urns_to_lowercase: - field_name = field_name.lower() - - cll.append( - FineGrainedLineage( - upstreamType=FineGrainedLineageUpstreamType.FIELD_SET, - upstreams=[mce_builder.make_schema_field_urn(upstream_urn, field_name)], - downstreamType=FineGrainedLineageDownstreamType.FIELD, - downstreams=[ - mce_builder.make_schema_field_urn(downstream_urn, field_name) - ], + if not (node.node_type == "source" and skip_sources_in_lineage): + # If `skip_sources_in_lineage` is enabled, we want to generate table lineage (for siblings) + # but not CLL. That's because CLL will make it look like the warehouse node has downstream + # column lineage, but it's really just empty. + for column in node.columns or []: + field_name = column.name + if convert_column_urns_to_lowercase: + field_name = field_name.lower() + + cll.append( + FineGrainedLineage( + upstreamType=FineGrainedLineageUpstreamType.FIELD_SET, + upstreams=[ + mce_builder.make_schema_field_urn(upstream_urn, field_name) + ], + downstreamType=FineGrainedLineageDownstreamType.FIELD, + downstreams=[ + mce_builder.make_schema_field_urn(downstream_urn, field_name) + ], + ) ) - ) return UpstreamLineageClass( upstreams=[ @@ -1476,6 +1484,7 @@ def create_target_platform_mces( downstream_urn=node_datahub_urn, node=node, convert_column_urns_to_lowercase=self.config.convert_column_urns_to_lowercase, + skip_sources_in_lineage=self.config.skip_sources_in_lineage, ) if self.config.incremental_lineage: # We only generate incremental lineage for non-dbt nodes. @@ -1821,6 +1830,7 @@ def _create_lineage_aspect_for_dbt_node( downstream_urn=node_urn, node=node, convert_column_urns_to_lowercase=self.config.convert_column_urns_to_lowercase, + skip_sources_in_lineage=self.config.skip_sources_in_lineage, ) else: upstream_urns = get_upstreams( diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py index bcf6d380a60fd0..bfae3060013d59 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py @@ -298,6 +298,16 @@ class LookerDashboardSourceConfig( description="When enabled, platform instance will be added in dashboard and chart urn.", ) + folder_path_pattern: AllowDenyPattern = Field( + default=AllowDenyPattern.allow_all(), + description="Allow or deny dashboards from specific folders. " + "For example: \n" + "deny: \n" + " - sales/deprecated \n" + "This pattern will deny the ingestion of all dashboards and looks within the sales/deprecated folder. \n" + "Dashboards will only be ingested if they're allowed by both this config and dashboard_pattern.", + ) + @validator("external_base_url", pre=True, always=True) def external_url_defaults_to_api_config_base_url( cls, v: Optional[str], *, values: Dict[str, Any], **kwargs: Dict[str, Any] diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_constant.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_constant.py index 21160cc97d4a62..920efeaa709e59 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_constant.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_constant.py @@ -1 +1,13 @@ IMPORTED_PROJECTS = "imported_projects" +DIMENSIONS = "dimensions" +MEASURES = "measures" +DIMENSION_GROUPS = "dimension_groups" +SQL_TABLE_NAME = "sql_table_name" +DATAHUB_TRANSFORMED_SQL_TABLE_NAME = "datahub_transformed_sql_table_name" +DERIVED_TABLE = "derived_table" +SQL = "sql" +DATAHUB_TRANSFORMED_SQL = "datahub_transformed_sql" +prod = "prod" +dev = "dev" +NAME = "name" +DERIVED_DOT_SQL = "derived.sql" diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_file_loader.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_file_loader.py index fd670c23ad9cb0..52ebcdde06a279 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_file_loader.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_file_loader.py @@ -1,17 +1,18 @@ import logging import pathlib from dataclasses import replace -from typing import Any, Dict, Optional +from typing import Dict, Optional from datahub.ingestion.source.looker.lkml_patched import load_lkml from datahub.ingestion.source.looker.looker_config import LookerConnectionDefinition from datahub.ingestion.source.looker.looker_dataclasses import LookerViewFile from datahub.ingestion.source.looker.looker_template_language import ( - resolve_liquid_variable_in_view_dict, + process_lookml_template_language, ) from datahub.ingestion.source.looker.lookml_config import ( _EXPLORE_FILE_EXTENSION, _VIEW_FILE_EXTENSION, + LookMLSourceConfig, LookMLSourceReport, ) @@ -29,13 +30,13 @@ def __init__( root_project_name: Optional[str], base_projects_folder: Dict[str, pathlib.Path], reporter: LookMLSourceReport, - liquid_variable: Dict[Any, Any], + source_config: LookMLSourceConfig, ) -> None: self.viewfile_cache: Dict[str, Optional[LookerViewFile]] = {} self._root_project_name = root_project_name self._base_projects_folder = base_projects_folder self.reporter = reporter - self.liquid_variable = liquid_variable + self.source_config = source_config def _load_viewfile( self, project_name: str, path: str, reporter: LookMLSourceReport @@ -73,9 +74,9 @@ def _load_viewfile( parsed = load_lkml(path) - resolve_liquid_variable_in_view_dict( - raw_view=parsed, - liquid_variable=self.liquid_variable, + process_lookml_template_language( + view_lkml_file_dict=parsed, + source_config=self.source_config, ) looker_viewfile = LookerViewFile.from_looker_dict( diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py index ef329da930dda4..2d5250cfb74fa5 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py @@ -1334,6 +1334,17 @@ def process_dashboard( looker_dashboard = self._get_looker_dashboard(dashboard_object) workunits = [] + if ( + looker_dashboard.folder_path is not None + and not self.source_config.folder_path_pattern.allowed( + looker_dashboard.folder_path + ) + ): + logger.debug( + f"Folder path {looker_dashboard.folder_path} is denied in folder_path_pattern" + ) + return [], None, dashboard_id, start_time, datetime.datetime.now() + if looker_dashboard.folder: workunits += list( self._get_folder_and_ancestors_workunits(looker_dashboard.folder) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py index 99f83b5e922bad..04f9ec081ee680 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py @@ -1,15 +1,31 @@ import logging import re -from typing import Any, ClassVar, Dict, Set +from abc import ABC, abstractmethod +from typing import Any, ClassVar, Dict, List, Optional, Set +from deepmerge import always_merger from liquid import Undefined from liquid.exceptions import LiquidSyntaxError +from datahub.ingestion.source.looker.looker_constant import ( + DATAHUB_TRANSFORMED_SQL, + DATAHUB_TRANSFORMED_SQL_TABLE_NAME, + DERIVED_DOT_SQL, + DERIVED_TABLE, + NAME, + SQL, + SQL_TABLE_NAME, + dev, + prod, +) from datahub.ingestion.source.looker.looker_liquid_tag import ( CustomTagException, create_template, ) -from datahub.ingestion.source.looker.lookml_config import DERIVED_VIEW_PATTERN +from datahub.ingestion.source.looker.lookml_config import ( + DERIVED_VIEW_PATTERN, + LookMLSourceConfig, +) logger = logging.getLogger(__name__) @@ -92,52 +108,311 @@ def resolve_liquid_variable(text: str, liquid_variable: Dict[Any, Any]) -> str: return text -def _drop_derived_view_pattern(value: str) -> str: - # Drop ${ and } - return re.sub(DERIVED_VIEW_PATTERN, r"\1", value) +class LookMLViewTransformer(ABC): + """ + There are many transformations that we need to perform on the LookML view to make it suitable for metadata ingestion. + + These transformations include: + + 1. Evaluating Looker templates, such as `-- if prod --` comments. Example `LookMlIfCommentTransformer`. + + 2. Resolving Liquid templates. Example `LiquidVariableTransformer`. + + 3. Removing ${} from derived view patterns. Example `DropDerivedViewPatternTransformer`. + (e.g., changing ${view_name.SQL_TABLE_NAME} to 4. view_name.SQL_TABLE_NAME). + + 4. Completing incomplete SQL fragments. Example `IncompleteSqlTransformer`. + + Each transformer works on specific attributes of the LookML view. For example, the #4 transformation is only + applicable to the view.derived.sql attribute, while the other transformations apply to both the + view.sql_table_name and view.derived.sql attributes. + + This class contains the logic to ensure that the transformer is applied to specific attributes and returns a + dictionary containing the transformed data. + + For example: + In case of #1 and #2, it returns: + + **transformed derived_table:** + ``` + { + "derived_table": { + "datahub_transformed_sql": "" + } + } + ``` + + **Whereas original was:** + ``` + { + "derived_table": { + "sql": "" + } + } + ``` + + In case #3, it returns: + **transformed sql_table_name:** + ``` + { + "datahub_transformed_sql_table_name": "employee_income_source.SQL_TABLE_NAME" + } + ``` + + **Whereas original was:** + ``` + { + "sql_table_name": "${employee_income_source.SQL_TABLE_NAME}" + } + ``` + + In case #4, it returns: + **transformed derived_table:** + ``` + { + "derived_table": { + "datahub_transformed_sql": "SELECT column_a, column_b FROM foo" + } + } + ``` + + **Whereas original was:** + ``` + { + "derived_table": { + "sql": "column_a, column_b" + } + } + ``` + + Each transformation generates a section of the transformed dictionary with a new attribute named + `datahub_transformed_`. + """ + + source_config: LookMLSourceConfig + + def __init__(self, source_config: LookMLSourceConfig): + self.source_config = source_config + + def transform(self, view: dict) -> dict: + value_to_transform: Optional[str] = None + + # is_attribute_supported check is required because not all transformer works on all attributes in current + # case mostly all transformer works on sql_table_name and derived.sql attributes, + # however IncompleteSqlTransformer only transform the derived.sql attribute + if SQL_TABLE_NAME in view and self.is_attribute_supported(SQL_TABLE_NAME): + # Give precedence to already processed transformed view.sql_table_name to apply more transformation + value_to_transform = view.get( + DATAHUB_TRANSFORMED_SQL_TABLE_NAME, view[SQL_TABLE_NAME] + ) + if ( + DERIVED_TABLE in view + and SQL in view[DERIVED_TABLE] + and self.is_attribute_supported(DERIVED_DOT_SQL) + ): + # Give precedence to already processed transformed view.derived.sql to apply more transformation + value_to_transform = view[DERIVED_TABLE].get( + DATAHUB_TRANSFORMED_SQL, view[DERIVED_TABLE][SQL] + ) -def _complete_incomplete_sql(raw_view: dict, sql: str) -> str: + if value_to_transform is None: + return {} - # Looker supports sql fragments that omit the SELECT and FROM parts of the query - # Add those in if we detect that it is missing - sql_query: str = sql + logger.debug(f"value to transform = {value_to_transform}") - if not re.search(r"SELECT\s", sql_query, flags=re.I): - # add a SELECT clause at the beginning - sql_query = f"SELECT {sql}" + transformed_value: str = self._apply_transformation( + value=value_to_transform, view=view + ) - if not re.search(r"FROM\s", sql_query, flags=re.I): - # add a FROM clause at the end - sql_query = f"{sql_query} FROM {raw_view['name']}" + logger.debug(f"transformed value = {transformed_value}") - return _drop_derived_view_pattern(sql_query) + if SQL_TABLE_NAME in view and value_to_transform: + return {DATAHUB_TRANSFORMED_SQL_TABLE_NAME: transformed_value} + if DERIVED_TABLE in view and SQL in view[DERIVED_TABLE] and value_to_transform: + return {DERIVED_TABLE: {DATAHUB_TRANSFORMED_SQL: transformed_value}} -def resolve_liquid_variable_in_view_dict( - raw_view: dict, liquid_variable: Dict[Any, Any] -) -> None: - if "views" not in raw_view: - return + return {} - for view in raw_view["views"]: - if "sql_table_name" in view: - view["datahub_transformed_sql_table_name"] = resolve_liquid_variable( - text=view["sql_table_name"], - liquid_variable=liquid_variable, - ) # keeping original sql_table_name as is to avoid any visualization issue later + @abstractmethod + def _apply_transformation(self, value: str, view: dict) -> str: + pass - view["datahub_transformed_sql_table_name"] = _drop_derived_view_pattern( - value=view["datahub_transformed_sql_table_name"] - ) + def is_attribute_supported(self, attribute: str) -> bool: + return attribute in [DERIVED_DOT_SQL, SQL_TABLE_NAME] + + +class LiquidVariableTransformer(LookMLViewTransformer): + """ + Replace the liquid variables with their values. + """ + + def _apply_transformation(self, value: str, view: dict) -> str: + return resolve_liquid_variable( + text=value, + liquid_variable=self.source_config.liquid_variable, + ) + + +class IncompleteSqlTransformer(LookMLViewTransformer): + """ + lookml view may contain the fragment of sql, however for lineage generation we need a complete sql. + IncompleteSqlTransformer will complete the view's derived.sql. + """ + + def is_attribute_supported(self, attribute: str) -> bool: + return attribute in [DERIVED_DOT_SQL] - if "derived_table" in view and "sql" in view["derived_table"]: - # In sql we don't need to remove the extra spaces as sql parser takes care of extra spaces and \n - # while generating URN from sql - view["derived_table"]["datahub_transformed_sql"] = resolve_liquid_variable( - text=view["derived_table"]["sql"], liquid_variable=liquid_variable - ) # keeping original sql as is, so that on UI sql will be shown same is it is visible on looker portal + def _apply_transformation(self, value: str, view: dict) -> str: + if DERIVED_TABLE not in view or SQL not in view[DERIVED_TABLE]: + # This transformation is only applicable in-case of view contains view.derived.sql + return value - view["derived_table"]["datahub_transformed_sql"] = _complete_incomplete_sql( - raw_view=view, sql=view["derived_table"]["datahub_transformed_sql"] + # Looker supports sql fragments that omit the SELECT and FROM parts of the query + # Add those in if we detect that it is missing + sql_query: str = value + + if not re.search(r"SELECT\s", sql_query, flags=re.I): + # add a SELECT clause at the beginning + sql_query = f"SELECT {sql_query}" + + if not re.search(r"FROM\s", sql_query, flags=re.I): + # add a FROM clause at the end + sql_query = f"{sql_query} FROM {view[NAME]}" + + return sql_query + + +class DropDerivedViewPatternTransformer(LookMLViewTransformer): + """ + drop ${} from datahub_transformed_sql_table_name and view["derived_table"]["datahub_transformed_sql_table_name"] values. + + Example: transform ${employee_income_source.SQL_TABLE_NAME} to employee_income_source.SQL_TABLE_NAME + """ + + def _apply_transformation(self, value: str, view: dict) -> str: + return re.sub( + DERIVED_VIEW_PATTERN, + r"\1", + value, + ) + + +class LookMlIfCommentTransformer(LookMLViewTransformer): + """ + Evaluate the looker -- if -- comments. + """ + + evaluate_to_true_regx: str + remove_if_comment_line_regx: str + + def __init__(self, source_config: LookMLSourceConfig): + super().__init__(source_config=source_config) + + # This regx will keep whatever after -- if looker_environment -- + self.evaluate_to_true_regx = r"-- if {} --".format( + self.source_config.looker_environment + ) + + # It will remove all other lines starts with -- if ... -- + self.remove_if_comment_line_regx = r"-- if {} --.*?(?=\n|-- if|$)".format( + dev if self.source_config.looker_environment.lower() == prod else prod + ) + + def _apply_regx(self, value: str) -> str: + result: str = re.sub( + self.remove_if_comment_line_regx, "", value, flags=re.IGNORECASE | re.DOTALL + ) + + # Remove '-- if prod --' but keep the rest of the line + result = re.sub(self.evaluate_to_true_regx, "", result, flags=re.IGNORECASE) + + return result + + def _apply_transformation(self, value: str, view: dict) -> str: + return self._apply_regx(value) + + +class TransformedLookMlView: + """ + TransformedLookMlView is collecting output of LookMLViewTransformer and creating a new transformed LookML view. + TransformedLookMlView creates a copy of the original view dictionary and updates the copy with the transformed output. + The deepmerge library is used because Python's dict.update function doesn't merge nested fields. + + The transformed LookML view will contain the following attributes: + + ``` + { + "derived_table": { + "sql": "" + }, + + dimensions ..... + } + ``` + see documentation of LookMLViewTransformer for output of each transformer. + """ + + transformers: List[LookMLViewTransformer] + view_dict: dict + transformed_dict: dict + + def __init__( + self, + transformers: List[LookMLViewTransformer], + view_dict: dict, + ): + self.transformers = transformers + self.view_dict = view_dict + self.transformed_dict = {} + + def view(self) -> dict: + if self.transformed_dict: + return self.transformed_dict + + self.transformed_dict = {**self.view_dict} + + logger.debug(f"Processing view {self.view_dict[NAME]}") + + for transformer in self.transformers: + logger.debug(f"Applying transformer {transformer.__class__.__name__}") + + self.transformed_dict = always_merger.merge( + self.transformed_dict, transformer.transform(self.transformed_dict) ) + + return self.transformed_dict + + +def process_lookml_template_language( + source_config: LookMLSourceConfig, + view_lkml_file_dict: dict, +) -> None: + if "views" not in view_lkml_file_dict: + return + + transformers: List[LookMLViewTransformer] = [ + LookMlIfCommentTransformer( + source_config=source_config + ), # First evaluate the -- if -- comments. Looker does the same + LiquidVariableTransformer( + source_config=source_config + ), # Now resolve liquid variables + DropDerivedViewPatternTransformer( + source_config=source_config + ), # Remove any ${} symbol + IncompleteSqlTransformer( + source_config=source_config + ), # complete any incomplete sql + ] + + transformed_views: List[dict] = [] + + for view in view_lkml_file_dict["views"]: + transformed_views.append( + TransformedLookMlView(transformers=transformers, view_dict=view).view() + ) + + view_lkml_file_dict["views"] = transformed_views diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py index 7805b8b7b7d9a5..bf24f4b84679b1 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py @@ -8,6 +8,11 @@ find_view_from_resolved_includes, ) from datahub.ingestion.source.looker.looker_config import LookerConnectionDefinition +from datahub.ingestion.source.looker.looker_constant import ( + DIMENSION_GROUPS, + DIMENSIONS, + MEASURES, +) from datahub.ingestion.source.looker.looker_dataclasses import LookerViewFile from datahub.ingestion.source.looker.looker_file_loader import LookerViewFileLoader from datahub.ingestion.source.looker.lookml_config import ( @@ -23,6 +28,39 @@ logger = logging.getLogger(__name__) +def merge_parent_and_child_fields( + child_fields: List[dict], parent_fields: List[dict] +) -> List[Dict]: + # Fetch the fields from the parent view, i.e., the view name mentioned in view.extends, and include those + # fields in child_fields. This inclusion will resolve the fields according to the precedence rules mentioned + # in the LookML documentation: https://cloud.google.com/looker/docs/reference/param-view-extends. + + # Create a map field-name vs field + child_field_map: dict = {} + for field in child_fields: + assert ( + NAME in field + ), "A lookml view must have a name field" # name is required field of lookml field array + + child_field_map[field[NAME]] = field + + for field in parent_fields: + assert ( + NAME in field + ), "A lookml view must have a name field" # name is required field of lookml field array + + if field[NAME] in child_field_map: + # Fields defined in the child view take higher precedence. + # This is an override case where the child has redefined the parent field. + # There are some additive attributes; however, we are not consuming them in metadata ingestion + # and hence not adding them to the child field. + continue + + child_fields.append(field) + + return child_fields + + class LookerFieldContext: raw_field: Dict[Any, Any] @@ -248,23 +286,21 @@ def resolve_extends_view_name( ) return None - def get_including_extends( + def _get_parent_attribute( self, - field: str, + attribute_name: str, ) -> Optional[Any]: + """ + Search for the attribute_name in the parent views of the current view and return its value. + """ extends = list( itertools.chain.from_iterable( self.raw_view.get("extends", self.raw_view.get("extends__all", [])) ) ) - # First, check the current view. - if field in self.raw_view: - return self.raw_view[field] - - # The field might be defined in another view and this view is extending that view, - # so we resolve this field while taking that into account. - # following Looker's precedence rules. + # Following Looker's precedence rules. + # reversed the view-names mentioned in `extends` attribute for extend in reversed(extends): assert extend != self.raw_view[NAME], "a view cannot extend itself" extend_view = self.resolve_extends_view_name( @@ -275,8 +311,33 @@ def get_including_extends( f"failed to resolve extends view {extend} in view {self.raw_view[NAME]} of" f" file {self.view_file.absolute_file_path}" ) - if field in extend_view: - return extend_view[field] + if attribute_name in extend_view: + return extend_view[attribute_name] + + return None + + def get_including_extends( + self, + field: str, + ) -> Optional[Any]: + + # According to Looker's inheritance rules, we need to merge the fields(i.e. dimensions, measures and + # dimension_groups) from both the child and parent. + if field in [DIMENSIONS, DIMENSION_GROUPS, MEASURES]: + # Get the child fields + child_fields = self._get_list_dict(field) + # merge parent and child fields + return merge_parent_and_child_fields( + child_fields=child_fields, + parent_fields=self._get_parent_attribute(attribute_name=field) or [], + ) + else: + # Return the field from the current view if it exists. + if field in self.raw_view: + return self.raw_view[field] + + # The field might be defined in another view, and this view is extending that view, + return self._get_parent_attribute(field) return None @@ -313,9 +374,9 @@ def datahub_transformed_sql_table_name(self) -> str: # remove extra spaces and new lines from sql_table_name if it is not a sql if not self.is_direct_sql_query_case(): - table_name = remove_extra_spaces_and_newlines(table_name) # Some sql_table_name fields contain quotes like: optimizely."group", just remove the quotes table_name = table_name.replace('"', "").replace("`", "").lower() + table_name = remove_extra_spaces_and_newlines(table_name).strip() return table_name @@ -383,13 +444,13 @@ def _get_list_dict(self, attribute_name: str) -> List[Dict]: return [] def dimensions(self) -> List[Dict]: - return self._get_list_dict("dimensions") + return self.get_including_extends(field=DIMENSIONS) or [] def measures(self) -> List[Dict]: - return self._get_list_dict("measures") + return self.get_including_extends(field=MEASURES) or [] def dimension_groups(self) -> List[Dict]: - return self._get_list_dict("dimension_groups") + return self.get_including_extends(field=DIMENSION_GROUPS) or [] def is_materialized_derived_view(self) -> bool: for k in self.derived_table(): @@ -433,7 +494,7 @@ def is_sql_based_derived_case(self) -> bool: return False def is_native_derived_case(self) -> bool: - # It is pattern 5 + # It is pattern 5, mentioned in Class documentation if ( "derived_table" in self.raw_view and "explore_source" in self.raw_view["derived_table"] @@ -443,7 +504,7 @@ def is_native_derived_case(self) -> bool: return False def is_sql_based_derived_view_without_fields_case(self) -> bool: - # Pattern 6 + # Pattern 6, mentioned in Class documentation fields: List[Dict] = [] fields.extend(self.dimensions()) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_config.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_config.py index f4fb1316b16a20..0bcee14ec77a1a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_config.py @@ -1,7 +1,7 @@ import logging from dataclasses import dataclass, field as dataclass_field from datetime import timedelta -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, List, Literal, Optional, Union import pydantic from pydantic import root_validator, validator @@ -174,6 +174,13 @@ class LookMLSourceConfig( "view.sql_table_name. Defaults to an empty dictionary.", ) + looker_environment: Literal["prod", "dev"] = Field( + "prod", + description="A looker prod or dev environment. " + "It helps to evaluate looker if comments i.e. -- if prod --. " + "All if comments are evaluated to true for configured looker_environment value", + ) + @validator("connection_to_platform_map", pre=True) def convert_string_to_connection_def(cls, conn_map): # Previous version of config supported strings in connection map. This upconverts strings to ConnectionMap diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py index d77e65ac733232..b00291caabbf68 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py @@ -669,7 +669,7 @@ def get_internal_workunits(self) -> Iterable[MetadataWorkUnit]: # noqa: C901 self.source_config.project_name, self.base_projects_folder, self.reporter, - self.source_config.liquid_variable, + self.source_config, ) # Some views can be mentioned by multiple 'include' statements and can be included via different connections. diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py b/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py index d5929b52aea3a3..0917a9e9faafee 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py @@ -385,7 +385,7 @@ def get_upstream_column_ref( config=self.config, ) - return upstreams_column_refs + return _drop_hive_dot_from_upstream(upstreams_column_refs) def get_upstream_dataset_urn(self) -> List[Urn]: return self._get_upstream_dataset_urn() diff --git a/metadata-ingestion/src/datahub/ingestion/source/mongodb.py b/metadata-ingestion/src/datahub/ingestion/source/mongodb.py index 7ce3b5bc34da2f..e4dadaf602852c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/mongodb.py +++ b/metadata-ingestion/src/datahub/ingestion/source/mongodb.py @@ -18,9 +18,13 @@ from datahub.emitter.mce_builder import ( make_data_platform_urn, make_dataplatform_instance_urn, - make_dataset_urn_with_platform_instance, ) from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.emitter.mcp_builder import ( + DatabaseKey, + add_dataset_to_container, + gen_containers, +) from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.decorators import ( SourceCapability, @@ -32,6 +36,7 @@ ) from datahub.ingestion.api.source import MetadataWorkUnitProcessor from datahub.ingestion.api.workunit import MetadataWorkUnit +from datahub.ingestion.source.common.subtypes import DatasetContainerSubTypes from datahub.ingestion.source.schema_inference.object import ( SchemaDescription, construct_schema, @@ -64,6 +69,7 @@ DataPlatformInstanceClass, DatasetPropertiesClass, ) +from datahub.metadata.urns import DatasetUrn logger = logging.getLogger(__name__) @@ -263,6 +269,7 @@ class MongoDBSource(StatefulIngestionSourceBase): config: MongoDBConfig report: MongoDBSourceReport mongo_client: MongoClient + platform: str = "mongodb" def __init__(self, ctx: PipelineContext, config: MongoDBConfig): super().__init__(config, ctx) @@ -282,7 +289,9 @@ def __init__(self, ctx: PipelineContext, config: MongoDBConfig): } # See https://pymongo.readthedocs.io/en/stable/examples/datetimes.html#handling-out-of-range-datetimes - self.mongo_client = MongoClient(self.config.connect_uri, datetime_conversion="DATETIME_AUTO", **options) # type: ignore + self.mongo_client = MongoClient( + self.config.connect_uri, datetime_conversion="DATETIME_AUTO", **options + ) # type: ignore # This cheaply tests the connection. For details, see # https://pymongo.readthedocs.io/en/stable/api/pymongo/mongo_client.html#pymongo.mongo_client.MongoClient @@ -351,8 +360,6 @@ def get_field_type( return SchemaFieldDataType(type=TypeClass()) def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: - platform = "mongodb" - database_names: List[str] = self.mongo_client.list_database_names() # traverse databases in sorted order so output is consistent @@ -364,8 +371,19 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: continue database = self.mongo_client[database_name] - collection_names: List[str] = database.list_collection_names() + database_key = DatabaseKey( + database=database_name, + platform=self.platform, + instance=self.config.platform_instance, + env=self.config.env, + ) + yield from gen_containers( + container_key=database_key, + name=database_name, + sub_types=[DatasetContainerSubTypes.DATABASE], + ) + collection_names: List[str] = database.list_collection_names() # traverse collections in sorted order so output is consistent for collection_name in sorted(collection_names): dataset_name = f"{database_name}.{collection_name}" @@ -374,9 +392,9 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: self.report.report_dropped(dataset_name) continue - dataset_urn = make_dataset_urn_with_platform_instance( - platform=platform, - name=dataset_name, + dataset_urn = DatasetUrn.create_from_ids( + platform_id=self.platform, + table_name=dataset_name, env=self.config.env, platform_instance=self.config.platform_instance, ) @@ -385,9 +403,9 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: data_platform_instance = None if self.config.platform_instance: data_platform_instance = DataPlatformInstanceClass( - platform=make_data_platform_urn(platform), + platform=make_data_platform_urn(self.platform), instance=make_dataplatform_instance_urn( - platform, self.config.platform_instance + self.platform, self.config.platform_instance ), ) @@ -397,83 +415,21 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: ) schema_metadata: Optional[SchemaMetadata] = None - if self.config.enableSchemaInference: - assert self.config.maxDocumentSize is not None - collection_schema = construct_schema_pymongo( - database[collection_name], - delimiter=".", - use_random_sampling=self.config.useRandomSampling, - max_document_size=self.config.maxDocumentSize, - should_add_document_size_filter=self.should_add_document_size_filter(), - sample_size=self.config.schemaSamplingSize, - ) - - # initialize the schema for the collection - canonical_schema: List[SchemaField] = [] - max_schema_size = self.config.maxSchemaSize - collection_schema_size = len(collection_schema.values()) - collection_fields: Union[ - List[SchemaDescription], ValuesView[SchemaDescription] - ] = collection_schema.values() - assert max_schema_size is not None - if collection_schema_size > max_schema_size: - # downsample the schema, using frequency as the sort key - self.report.report_warning( - title="Too many schema fields", - message=f"Downsampling the collection schema because it has too many schema fields. Configured threshold is {max_schema_size}", - context=f"Schema Size: {collection_schema_size}, Collection: {dataset_urn}", - ) - # Add this information to the custom properties so user can know they are looking at downsampled schema - dataset_properties.customProperties[ - "schema.downsampled" - ] = "True" - dataset_properties.customProperties[ - "schema.totalFields" - ] = f"{collection_schema_size}" - - logger.debug( - f"Size of collection fields = {len(collection_fields)}" - ) - # append each schema field (sort so output is consistent) - for schema_field in sorted( - collection_fields, - key=lambda x: ( - -x["count"], - x["delimited_name"], - ), # Negate `count` for descending order, `delimited_name` stays the same for ascending - )[0:max_schema_size]: - field = SchemaField( - fieldPath=schema_field["delimited_name"], - nativeDataType=self.get_pymongo_type_string( - schema_field["type"], dataset_name - ), - type=self.get_field_type( - schema_field["type"], dataset_name - ), - description=None, - nullable=schema_field["nullable"], - recursive=False, - ) - canonical_schema.append(field) - - # create schema metadata object for collection - schema_metadata = SchemaMetadata( - schemaName=collection_name, - platform=f"urn:li:dataPlatform:{platform}", - version=0, - hash="", - platformSchema=SchemalessClass(), - fields=canonical_schema, + schema_metadata = self._infer_schema_metadata( + collection=database[collection_name], + dataset_urn=dataset_urn, + dataset_properties=dataset_properties, ) # TODO: use list_indexes() or index_information() to get index information # See https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html#pymongo.collection.Collection.list_indexes. + yield from add_dataset_to_container(database_key, dataset_urn.urn()) yield from [ mcp.as_workunit() for mcp in MetadataChangeProposalWrapper.construct_many( - entityUrn=dataset_urn, + entityUrn=dataset_urn.urn(), aspects=[ schema_metadata, dataset_properties, @@ -482,6 +438,74 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: ) ] + def _infer_schema_metadata( + self, + collection: pymongo.collection.Collection, + dataset_urn: DatasetUrn, + dataset_properties: DatasetPropertiesClass, + ) -> SchemaMetadata: + assert self.config.maxDocumentSize is not None + collection_schema = construct_schema_pymongo( + collection, + delimiter=".", + use_random_sampling=self.config.useRandomSampling, + max_document_size=self.config.maxDocumentSize, + should_add_document_size_filter=self.should_add_document_size_filter(), + sample_size=self.config.schemaSamplingSize, + ) + + # initialize the schema for the collection + canonical_schema: List[SchemaField] = [] + max_schema_size = self.config.maxSchemaSize + collection_schema_size = len(collection_schema.values()) + collection_fields: Union[ + List[SchemaDescription], ValuesView[SchemaDescription] + ] = collection_schema.values() + assert max_schema_size is not None + if collection_schema_size > max_schema_size: + # downsample the schema, using frequency as the sort key + self.report.report_warning( + title="Too many schema fields", + message=f"Downsampling the collection schema because it has too many schema fields. Configured threshold is {max_schema_size}", + context=f"Schema Size: {collection_schema_size}, Collection: {dataset_urn}", + ) + # Add this information to the custom properties so user can know they are looking at downsampled schema + dataset_properties.customProperties["schema.downsampled"] = "True" + dataset_properties.customProperties[ + "schema.totalFields" + ] = f"{collection_schema_size}" + + logger.debug(f"Size of collection fields = {len(collection_fields)}") + # append each schema field (sort so output is consistent) + for schema_field in sorted( + collection_fields, + key=lambda x: ( + -x["count"], + x["delimited_name"], + ), # Negate `count` for descending order, `delimited_name` stays the same for ascending + )[0:max_schema_size]: + field = SchemaField( + fieldPath=schema_field["delimited_name"], + nativeDataType=self.get_pymongo_type_string( + schema_field["type"], dataset_urn.name + ), + type=self.get_field_type(schema_field["type"], dataset_urn.name), + description=None, + nullable=schema_field["nullable"], + recursive=False, + ) + canonical_schema.append(field) + + # create schema metadata object for collection + return SchemaMetadata( + schemaName=collection.name, + platform=f"urn:li:dataPlatform:{self.platform}", + version=0, + hash="", + platformSchema=SchemalessClass(), + fields=canonical_schema, + ) + def is_server_version_gte_4_4(self) -> bool: try: server_version = self.mongo_client.server_info().get("versionArray") diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift_schema.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift_schema.py index 2e628269edbc37..594f88dd521ad5 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift_schema.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift_schema.py @@ -313,23 +313,19 @@ def get_table_stats(enriched_tables, field_names, schema, table): size_in_bytes: Optional[int] = None rows_count: Optional[int] = None if schema in enriched_tables and table_name in enriched_tables[schema]: - if enriched_tables[schema][table_name].last_accessed is not None: - # Mypy seems to be not clever enough to understand the above check - last_accessed = enriched_tables[schema][table_name].last_accessed - assert last_accessed + if ( + last_accessed := enriched_tables[schema][table_name].last_accessed + ) is not None: last_altered = last_accessed.replace(tzinfo=timezone.utc) elif creation_time: last_altered = creation_time - if enriched_tables[schema][table_name].size is not None: - # Mypy seems to be not clever enough to understand the above check - size = enriched_tables[schema][table_name].size - if size: - size_in_bytes = size * 1024 * 1024 + if (size := enriched_tables[schema][table_name].size) is not None: + size_in_bytes = size * 1024 * 1024 - if enriched_tables[schema][table_name].estimated_visible_rows is not None: - rows = enriched_tables[schema][table_name].estimated_visible_rows - assert rows + if ( + rows := enriched_tables[schema][table_name].estimated_visible_rows + ) is not None: rows_count = int(rows) else: # The object was not found in the enriched data. diff --git a/metadata-ingestion/src/datahub/ingestion/source/s3/config.py b/metadata-ingestion/src/datahub/ingestion/source/s3/config.py index 921ab275642505..8f1b79251c466f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/s3/config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/s3/config.py @@ -98,6 +98,11 @@ class DataLakeSourceConfig( description="Whether to sort schema fields by fieldPath when inferring schemas.", ) + generate_partition_aspects: bool = Field( + default=True, + description="Whether to generate partition aspects for partitioned tables. On older servers for backward compatibility, this should be set to False. This flag will be removed in future versions.", + ) + def is_profiling_enabled(self) -> bool: return self.profiling.enabled and is_profiling_enabled( self.profiling.operation_config diff --git a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py index f81d06c35e3b09..55e25ebe88d125 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py @@ -5,10 +5,10 @@ import pathlib import re import time -from collections import OrderedDict from datetime import datetime +from itertools import groupby from pathlib import PurePath -from typing import Dict, Iterable, List, Optional, Tuple +from typing import Any, Dict, Iterable, List, Optional, Tuple import smart_open.compression as so_compression from more_itertools import peekable @@ -43,6 +43,7 @@ strip_s3_prefix, ) from datahub.ingestion.source.data_lake_common.data_lake_utils import ContainerWUCreator +from datahub.ingestion.source.data_lake_common.path_spec import FolderTraversalMethod from datahub.ingestion.source.s3.config import DataLakeSourceConfig, PathSpec from datahub.ingestion.source.s3.report import DataLakeSourceReport from datahub.ingestion.source.schema_inference import avro, csv_tsv, json, parquet @@ -52,6 +53,7 @@ from datahub.ingestion.source.state.stateful_ingestion_base import ( StatefulIngestionSourceBase, ) +from datahub.metadata.com.linkedin.pegasus2avro.common import TimeStamp from datahub.metadata.com.linkedin.pegasus2avro.schema import ( SchemaField, SchemaMetadata, @@ -63,6 +65,8 @@ OperationClass, OperationTypeClass, OtherSchemaClass, + PartitionsSummaryClass, + PartitionSummaryClass, SchemaFieldDataTypeClass, _Aspect, ) @@ -134,16 +138,43 @@ def partitioned_folder_comparator(folder1: str, folder2: str) -> int: return 1 if folder1 > folder2 else -1 +@dataclasses.dataclass +class Folder: + creation_time: datetime + modification_time: datetime + size: int + sample_file: str + partition_id: Optional[List[Tuple[str, str]]] = None + is_partition: bool = False + + def partition_id_text(self) -> Optional[str]: + return ( + "/".join([f"{k}={v}" for k, v in self.partition_id]) + if self.partition_id + else None + ) + + +@dataclasses.dataclass +class BrowsePath: + file: str + timestamp: datetime + size: int + partitions: List[Folder] + + @dataclasses.dataclass class TableData: display_name: str is_s3: bool full_path: str - partitions: Optional[OrderedDict] timestamp: datetime table_path: str size_in_bytes: int number_of_files: int + partitions: Optional[List[Folder]] = None + max_partition: Optional[Folder] = None + min_partition: Optional[Folder] = None @platform_name("S3 / Local Files", id="s3") @@ -395,7 +426,7 @@ def get_fields(self, table_data: TableData, path_spec: PathSpec) -> List: if self.source_config.sort_schema_fields: fields = sorted(fields, key=lambda f: f.fieldPath) - if self.source_config.add_partition_columns_to_schema: + if self.source_config.add_partition_columns_to_schema and table_data.partitions: self.add_partition_columns_to_schema( fields=fields, path_spec=path_spec, full_path=table_data.full_path ) @@ -410,21 +441,23 @@ def add_partition_columns_to_schema( if field.fieldPath.startswith("[version=2.0]"): is_fieldpath_v2 = True break - vars = path_spec.get_named_vars(full_path) - if vars is not None and "partition_key" in vars: - for partition_key in vars["partition_key"].values(): - fields.append( - SchemaField( - fieldPath=f"{partition_key}" - if not is_fieldpath_v2 - else f"[version=2.0].[type=string].{partition_key}", - nativeDataType="string", - type=SchemaFieldDataTypeClass(StringTypeClass()), - isPartitioningKey=True, - nullable=True, - recursive=False, - ) + partition_keys = path_spec.get_partition_from_path(full_path) + if not partition_keys: + return None + + for partition_key in partition_keys: + fields.append( + SchemaField( + fieldPath=f"{partition_key[0]}" + if not is_fieldpath_v2 + else f"[version=2.0].[type=string].{partition_key[0]}", + nativeDataType="string", + type=SchemaFieldDataTypeClass(StringTypeClass()), + isPartitioningKey=True, + nullable=True, + recursive=False, ) + ) def get_table_profile( self, table_data: TableData, dataset_urn: str @@ -517,6 +550,39 @@ def _create_table_operation_aspect(self, table_data: TableData) -> OperationClas return operation + def __create_partition_summary_aspect( + self, partitions: List[Folder] + ) -> Optional[PartitionsSummaryClass]: + min_partition = min(partitions, key=lambda x: x.creation_time) + max_partition = max(partitions, key=lambda x: x.creation_time) + + max_partition_summary: Optional[PartitionSummaryClass] = None + + max_partition_id = max_partition.partition_id_text() + if max_partition_id is not None: + max_partition_summary = PartitionSummaryClass( + partition=max_partition_id, + createdTime=int(max_partition.creation_time.timestamp() * 1000), + lastModifiedTime=int( + max_partition.modification_time.timestamp() * 1000 + ), + ) + + min_partition_summary: Optional[PartitionSummaryClass] = None + min_partition_id = min_partition.partition_id_text() + if min_partition_id is not None: + min_partition_summary = PartitionSummaryClass( + partition=min_partition_id, + createdTime=int(min_partition.creation_time.timestamp() * 1000), + lastModifiedTime=int( + min_partition.modification_time.timestamp() * 1000 + ), + ) + + return PartitionsSummaryClass( + maxPartition=max_partition_summary, minPartition=min_partition_summary + ) + def ingest_table( self, table_data: TableData, path_spec: PathSpec ) -> Iterable[MetadataWorkUnit]: @@ -549,6 +615,12 @@ def ingest_table( customProperties = {"schema_inferred_from": str(table_data.full_path)} + min_partition: Optional[Folder] = None + max_partition: Optional[Folder] = None + if table_data.partitions: + min_partition = min(table_data.partitions, key=lambda x: x.creation_time) + max_partition = max(table_data.partitions, key=lambda x: x.creation_time) + if not path_spec.sample_files: customProperties.update( { @@ -556,11 +628,30 @@ def ingest_table( "size_in_bytes": str(table_data.size_in_bytes), } ) + else: + if table_data.partitions: + customProperties.update( + { + "number_of_partitions": str( + len(table_data.partitions) if table_data.partitions else 0 + ), + } + ) dataset_properties = DatasetPropertiesClass( description="", name=table_data.display_name, customProperties=customProperties, + created=( + TimeStamp(time=int(min_partition.creation_time.timestamp() * 1000)) + if min_partition + else None + ), + lastModified=( + TimeStamp(time=int(max_partition.modification_time.timestamp() * 1000)) + if max_partition + else None + ), ) aspects.append(dataset_properties) if table_data.size_in_bytes > 0: @@ -609,6 +700,12 @@ def ingest_table( operation = self._create_table_operation_aspect(table_data) aspects.append(operation) + + if table_data.partitions and self.source_config.generate_partition_aspects: + aspects.append( + self.__create_partition_summary_aspect(table_data.partitions) + ) + for mcp in MetadataChangeProposalWrapper.construct_many( entityUrn=dataset_urn, aspects=aspects, @@ -635,20 +732,35 @@ def extract_table_name(self, path_spec: PathSpec, named_vars: dict) -> str: return path_spec.table_name.format_map(named_vars) def extract_table_data( - self, path_spec: PathSpec, path: str, timestamp: datetime, size: int + self, + path_spec: PathSpec, + path: str, + timestamp: datetime, + size: int, + partitions: List[Folder], ) -> TableData: logger.debug(f"Getting table data for path: {path}") table_name, table_path = path_spec.extract_table_name_and_path(path) - table_data = None table_data = TableData( display_name=table_name, is_s3=self.is_s3_platform(), full_path=path, - partitions=None, + partitions=partitions, + max_partition=partitions[-1] if partitions else None, + min_partition=partitions[0] if partitions else None, timestamp=timestamp, table_path=table_path, number_of_files=1, - size_in_bytes=size, + size_in_bytes=( + size + if size + else sum( + [ + partition.size if partition.size else 0 + for partition in partitions + ] + ) + ), ) return table_data @@ -668,8 +780,17 @@ def resolve_templated_folders(self, bucket_name: str, prefix: str) -> Iterable[s ) def get_dir_to_process( - self, bucket_name: str, folder: str, path_spec: PathSpec, protocol: str - ) -> str: + self, + bucket_name: str, + folder: str, + path_spec: PathSpec, + protocol: str, + min: bool = False, + ) -> List[str]: + + # if len(path_spec.include.split("/")) == len(f"{protocol}{bucket_name}/{folder}".split("/")): + # return [f"{protocol}{bucket_name}/{folder}"] + iterator = list_folders( bucket_name=bucket_name, prefix=folder, @@ -680,23 +801,102 @@ def get_dir_to_process( sorted_dirs = sorted( iterator, key=functools.cmp_to_key(partitioned_folder_comparator), - reverse=True, + reverse=not min, ) + folders = [] for dir in sorted_dirs: if path_spec.dir_allowed(f"{protocol}{bucket_name}/{dir}/"): - return self.get_dir_to_process( + folders_list = self.get_dir_to_process( bucket_name=bucket_name, folder=dir + "/", path_spec=path_spec, protocol=protocol, + min=min, ) - return folder - else: - return folder + folders.extend(folders_list) + if not path_spec.traversal_method == FolderTraversalMethod.ALL: + return folders + if folders: + return folders + else: + return [f"{protocol}{bucket_name}/{folder}"] + return [f"{protocol}{bucket_name}/{folder}"] + + def get_folder_info( + self, + path_spec: PathSpec, + bucket: Any, # Todo: proper type + prefix: str, + ) -> List[Folder]: + """ + Retrieves all the folders in a path by listing all the files in the prefix. + If the prefix is a full path then only that folder will be extracted. + + A folder has creation and modification times, size, and a sample file path. + - Creation time is the earliest creation time of all files in the folder. + - Modification time is the latest modification time of all files in the folder. + - Size is the sum of all file sizes in the folder. + - Sample file path is used for schema inference later. (sample file is the latest created file in the folder) + + Parameters: + path_spec (PathSpec): The path specification used to determine partitioning. + bucket (Any): The S3 bucket object. + prefix (str): The prefix path in the S3 bucket to list objects from. + + Returns: + List[Folder]: A list of Folder objects representing the partitions found. + """ + + prefix_to_list = prefix + files = list( + bucket.objects.filter(Prefix=f"{prefix_to_list}").page_size(PAGE_SIZE) + ) + files = sorted(files, key=lambda a: a.last_modified) + grouped_files = groupby(files, lambda x: x.key.rsplit("/", 1)[0]) + + partitions: List[Folder] = [] + for key, group in grouped_files: + file_size = 0 + creation_time = None + modification_time = None + + for item in group: + file_path = self.create_s3_path(item.bucket_name, item.key) + if not path_spec.allowed(file_path): + logger.debug(f"File {file_path} not allowed and skipping") + continue + file_size += item.size + if creation_time is None or item.last_modified < creation_time: + creation_time = item.last_modified + if modification_time is None or item.last_modified > modification_time: + modification_time = item.last_modified + max_file = item + + if modification_time is None: + logger.warning( + f"Unable to find any files in the folder {key}. Skipping..." + ) + continue + + id = path_spec.get_partition_from_path( + self.create_s3_path(max_file.bucket_name, max_file.key) + ) + + # If id is None, it means the folder is not a partition + partitions.append( + Folder( + partition_id=id, + is_partition=bool(id), + creation_time=creation_time if creation_time else None, + modification_time=modification_time, + sample_file=self.create_s3_path(max_file.bucket_name, max_file.key), + size=file_size, + ) + ) + + return partitions - def s3_browser( - self, path_spec: PathSpec, sample_size: int - ) -> Iterable[Tuple[str, datetime, int]]: + def s3_browser(self, path_spec: PathSpec, sample_size: int) -> Iterable[BrowsePath]: if self.source_config.aws_config is None: raise ValueError("aws_config not set. Cannot browse s3") s3 = self.source_config.aws_config.get_s3_resource( @@ -728,6 +928,9 @@ def s3_browser( include = include.replace(max_match, "*") max_start = match.start() max_match = match.group() + # We stop at {table} + if max_match == "{table}": + break table_index = include.find(max_match) for folder in self.resolve_templated_folders( @@ -737,24 +940,68 @@ def s3_browser( for f in list_folders( bucket_name, f"{folder}", self.source_config.aws_config ): + dirs_to_process = [] logger.info(f"Processing folder: {f}") - protocol = ContainerWUCreator.get_protocol(path_spec.include) - dir_to_process = self.get_dir_to_process( - bucket_name=bucket_name, - folder=f + "/", - path_spec=path_spec, - protocol=protocol, + if path_spec.traversal_method == FolderTraversalMethod.ALL: + dirs_to_process.append(f) + else: + if ( + path_spec.traversal_method + == FolderTraversalMethod.MIN_MAX + or path_spec.traversal_method + == FolderTraversalMethod.MAX + ): + protocol = ContainerWUCreator.get_protocol( + path_spec.include + ) + dirs_to_process_max = self.get_dir_to_process( + bucket_name=bucket_name, + folder=f + "/", + path_spec=path_spec, + protocol=protocol, + ) + dirs_to_process.append(dirs_to_process_max[0]) + + if ( + path_spec.traversal_method + == FolderTraversalMethod.MIN_MAX + ): + dirs_to_process_min = self.get_dir_to_process( + bucket_name=bucket_name, + folder=f + "/", + path_spec=path_spec, + protocol=protocol, + min=True, + ) + dirs_to_process.append(dirs_to_process_min[0]) + folders = [] + for dir in dirs_to_process: + logger.info(f"Getting files from folder: {dir}") + prefix_to_process = dir.rstrip("\\").lstrip( + self.create_s3_path(bucket_name, "/") + ) + + folders.extend( + self.get_folder_info( + path_spec, bucket, prefix_to_process + ) + ) + max_folder = None + if folders: + max_folder = max(folders, key=lambda x: x.modification_time) + if not max_folder: + logger.warning( + f"Unable to find any files in the folder {dir}. Skipping..." + ) + continue + + partitions = list(filter(lambda x: x.is_partition, folders)) + yield BrowsePath( + file=max_folder.sample_file, + timestamp=max_folder.modification_time, + size=max_folder.size, + partitions=partitions, ) - logger.info(f"Getting files from folder: {dir_to_process}") - dir_to_process = dir_to_process.rstrip("\\") - for obj in ( - bucket.objects.filter(Prefix=f"{dir_to_process}") - .page_size(PAGE_SIZE) - .limit(sample_size) - ): - s3_path = self.create_s3_path(obj.bucket_name, obj.key) - logger.debug(f"Sampling file: {s3_path}") - yield s3_path, obj.last_modified, obj.size, except Exception as e: # This odd check if being done because boto does not have a proper exception to catch # The exception that appears in stacktrace cannot actually be caught without a lot more work @@ -774,18 +1021,26 @@ def s3_browser( for obj in bucket.objects.filter(Prefix=prefix).page_size(PAGE_SIZE): s3_path = self.create_s3_path(obj.bucket_name, obj.key) logger.debug(f"Path: {s3_path}") - yield s3_path, obj.last_modified, obj.size, + yield BrowsePath( + file=s3_path, + timestamp=obj.last_modified, + size=obj.size, + partitions=[], + ) def create_s3_path(self, bucket_name: str, key: str) -> str: return f"s3://{bucket_name}/{key}" - def local_browser(self, path_spec: PathSpec) -> Iterable[Tuple[str, datetime, int]]: + def local_browser(self, path_spec: PathSpec) -> Iterable[BrowsePath]: prefix = self.get_prefix(path_spec.include) if os.path.isfile(prefix): logger.debug(f"Scanning single local file: {prefix}") - yield prefix, datetime.utcfromtimestamp( - os.path.getmtime(prefix) - ), os.path.getsize(prefix) + yield BrowsePath( + file=prefix, + timestamp=datetime.utcfromtimestamp(os.path.getmtime(prefix)), + size=os.path.getsize(prefix), + partitions=[], + ) else: logger.debug(f"Scanning files under local folder: {prefix}") for root, dirs, files in os.walk(prefix): @@ -796,9 +1051,14 @@ def local_browser(self, path_spec: PathSpec) -> Iterable[Tuple[str, datetime, in full_path = PurePath( os.path.normpath(os.path.join(root, file)) ).as_posix() - yield full_path, datetime.utcfromtimestamp( - os.path.getmtime(full_path) - ), os.path.getsize(full_path) + yield BrowsePath( + file=full_path, + timestamp=datetime.utcfromtimestamp( + os.path.getmtime(full_path) + ), + size=os.path.getsize(full_path), + partitions=[], + ) def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: self.container_WU_creator = ContainerWUCreator( @@ -817,11 +1077,15 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: else self.local_browser(path_spec) ) table_dict: Dict[str, TableData] = {} - for file, timestamp, size in file_browser: - if not path_spec.allowed(file): + for browse_path in file_browser: + if not path_spec.allowed(browse_path.file): continue table_data = self.extract_table_data( - path_spec, file, timestamp, size + path_spec, + browse_path.file, + browse_path.timestamp, + browse_path.size, + browse_path.partitions, ) if table_data.table_path not in table_dict: table_dict[table_data.table_path] = table_data diff --git a/metadata-ingestion/src/datahub/ingestion/source/salesforce.py b/metadata-ingestion/src/datahub/ingestion/source/salesforce.py index 42128123c61442..7a7f1f30950eb6 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/salesforce.py +++ b/metadata-ingestion/src/datahub/ingestion/source/salesforce.py @@ -3,7 +3,7 @@ import time from datetime import datetime from enum import Enum -from typing import Dict, Iterable, List, Optional +from typing import Any, Dict, Iterable, List, Optional import requests from pydantic import Field, validator @@ -124,6 +124,9 @@ class SalesforceConfig(DatasetSourceConfigMixin): default=dict(), description='Regex patterns for tables/schemas to describe domain_key domain key (domain_key can be any string like "sales".) There can be multiple domain keys specified.', ) + api_version: Optional[str] = Field( + description="If specified, overrides default version used by the Salesforce package. Example value: '59.0'" + ) profiling: SalesforceProfilingConfig = SalesforceProfilingConfig() @@ -222,6 +225,12 @@ def __init__(self, config: SalesforceConfig, ctx: PipelineContext) -> None: self.session = requests.Session() self.platform: str = "salesforce" self.fieldCounts = {} + common_args: Dict[str, Any] = { + "domain": "test" if self.config.is_sandbox else None, + "session": self.session, + } + if self.config.api_version: + common_args["version"] = self.config.api_version try: if self.config.auth is SalesforceAuthType.DIRECT_ACCESS_TOKEN: @@ -236,8 +245,7 @@ def __init__(self, config: SalesforceConfig, ctx: PipelineContext) -> None: self.sf = Salesforce( instance_url=self.config.instance_url, session_id=self.config.access_token, - session=self.session, - domain="test" if self.config.is_sandbox else None, + **common_args, ) elif self.config.auth is SalesforceAuthType.USERNAME_PASSWORD: logger.debug("Username/Password Provided in Config") @@ -255,8 +263,7 @@ def __init__(self, config: SalesforceConfig, ctx: PipelineContext) -> None: username=self.config.username, password=self.config.password, security_token=self.config.security_token, - session=self.session, - domain="test" if self.config.is_sandbox else None, + **common_args, ) elif self.config.auth is SalesforceAuthType.JSON_WEB_TOKEN: @@ -275,14 +282,13 @@ def __init__(self, config: SalesforceConfig, ctx: PipelineContext) -> None: username=self.config.username, consumer_key=self.config.consumer_key, privatekey=self.config.private_key, - session=self.session, - domain="test" if self.config.is_sandbox else None, + **common_args, ) except Exception as e: logger.error(e) raise ConfigurationError("Salesforce login failed") from e - else: + if not self.config.api_version: # List all REST API versions and use latest one versions_url = "https://{instance}/services/data/".format( instance=self.sf.sf_instance, @@ -290,17 +296,22 @@ def __init__(self, config: SalesforceConfig, ctx: PipelineContext) -> None: versions_response = self.sf._call_salesforce("GET", versions_url).json() latest_version = versions_response[-1] version = latest_version["version"] + # we could avoid setting the version like below (after the Salesforce object has been already initiated + # above), since, according to the docs: + # https://developer.salesforce.com/docs/atlas.en-us.api_rest.meta/api_rest/dome_versions.htm + # we don't need to be authenticated to list the versions (so we could perform this call before even + # authenticating) self.sf.sf_version = version - self.base_url = "https://{instance}/services/data/v{sf_version}/".format( - instance=self.sf.sf_instance, sf_version=version - ) + self.base_url = "https://{instance}/services/data/v{sf_version}/".format( + instance=self.sf.sf_instance, sf_version=self.sf.sf_version + ) - logger.debug( - "Using Salesforce REST API with {label} version: {version}".format( - label=latest_version["label"], version=latest_version["version"] - ) + logger.debug( + "Using Salesforce REST API version: {version}".format( + version=self.sf.sf_version ) + ) def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: sObjects = self.get_salesforce_objects() diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/constants.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/constants.py index 6f5e22e39d0c84..3ce684b29cf396 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/constants.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/constants.py @@ -52,6 +52,7 @@ class SnowflakeObjectDomain(str, Enum): DATABASE = "database" SCHEMA = "schema" COLUMN = "column" + ICEBERG_TABLE = "iceberg table" GENERIC_PERMISSION_ERROR_KEY = "permission-error" diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py index ac9164cd0a0008..9e74fb8b496aa2 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py @@ -29,7 +29,7 @@ StatefulProfilingConfigMixin, StatefulUsageConfigMixin, ) -from datahub.ingestion.source_config.usage.snowflake_usage import SnowflakeUsageConfig +from datahub.ingestion.source.usage.usage_common import BaseUsageConfig from datahub.utilities.global_warning_util import add_global_warning logger = logging.Logger(__name__) @@ -135,6 +135,17 @@ class SnowflakeIdentifierConfig( ) +class SnowflakeUsageConfig(BaseUsageConfig): + email_domain: Optional[str] = pydantic.Field( + default=None, + description="Email domain of your organization so users can be displayed on UI appropriately.", + ) + apply_view_usage_to_tables: bool = pydantic.Field( + default=False, + description="Whether to apply view's usage to its base tables. If set to True, usage is applied to base tables only.", + ) + + # TODO: SnowflakeConfig is unused except for this inheritance. We should collapse the config inheritance hierarchy. class SnowflakeConfig( SnowflakeIdentifierConfig, diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py index 151e9fb6316207..6f9c9259b27844 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py @@ -353,7 +353,7 @@ def _populate_external_lineage_from_copy_history( def _process_external_lineage_result_row( cls, db_row: dict, - discovered_tables: Optional[List[str]], + discovered_tables: Optional[Collection[str]], identifiers: SnowflakeIdentifierBuilder, ) -> Optional[KnownLineageMapping]: # key is the down-stream table name diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_queries.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_queries.py index d5b8f98e400756..f03d687ac32f66 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_queries.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_queries.py @@ -131,7 +131,7 @@ def __init__( self.report = SnowflakeQueriesExtractorReport() self.filters = filters self.identifiers = identifiers - self.discovered_tables = discovered_tables + self.discovered_tables = set(discovered_tables) if discovered_tables else None self._structured_report = structured_report @@ -175,10 +175,24 @@ def local_temp_path(self) -> pathlib.Path: return path def is_temp_table(self, name: str) -> bool: - return any( + if any( re.match(pattern, name, flags=re.IGNORECASE) for pattern in self.config.temporary_tables_pattern - ) + ): + return True + + # This is also a temp table if + # 1. this name would be allowed by the dataset patterns, and + # 2. we have a list of discovered tables, and + # 3. it's not in the discovered tables list + if ( + self.filters.is_dataset_pattern_allowed(name, SnowflakeObjectDomain.TABLE) + and self.discovered_tables + and name not in self.discovered_tables + ): + return True + + return False def is_allowed_table(self, name: str) -> bool: if self.discovered_tables and name not in self.discovered_tables: @@ -219,7 +233,9 @@ def get_workunits_internal( queries.append(entry) with self.report.audit_log_load_timer: - for query in queries: + for i, query in enumerate(queries): + if i % 1000 == 0: + logger.info(f"Added {i} query log entries to SQL aggregator") self.aggregator.add(query) yield from auto_workunit(self.aggregator.gen_metadata()) @@ -275,8 +291,8 @@ def fetch_query_log( resp = self.connection.query(query_log_query) for i, row in enumerate(resp): - if i % 1000 == 0: - logger.info(f"Processed {i} query log rows") + if i > 0 and i % 1000 == 0: + logger.info(f"Processed {i} query log rows so far") assert isinstance(row, dict) try: diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py index a2e18a64d9a809..662e1cc2509eae 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py @@ -35,6 +35,7 @@ class SnowflakeQuery: SnowflakeObjectDomain.EXTERNAL_TABLE.capitalize(), SnowflakeObjectDomain.VIEW.capitalize(), SnowflakeObjectDomain.MATERIALIZED_VIEW.capitalize(), + SnowflakeObjectDomain.ICEBERG_TABLE.capitalize(), } ACCESS_HISTORY_TABLE_VIEW_DOMAINS_FILTER = "({})".format( diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py index 1881e1da5be686..0c861b1334d9fa 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py @@ -526,6 +526,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: filters=self.filters, identifiers=self.identifiers, schema_resolver=schema_resolver, + discovered_tables=discovered_datasets, ) # TODO: This is slightly suboptimal because we create two SqlParsingAggregator instances with different configs diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py b/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py index 93f2a0ef2f6a86..f3a9c4a5aa201e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py @@ -502,7 +502,12 @@ def get_tags_from_params(params: List[str] = []) -> GlobalTagsClass: def tableau_field_to_schema_field(field, ingest_tags): - nativeDataType = field.get("dataType", "UNKNOWN") + # The check here makes sure that even if 'dataType' key exists in the 'field' dictionary but has value None, + # it will be set as "UNKNOWN" (nativeDataType field can not be None in the SchemaField). + # Hence, field.get("dataType", "UNKNOWN") is not enough + nativeDataType = field.get("dataType") + if nativeDataType is None: + nativeDataType = "UNKNOWN" TypeClass = FIELD_TYPE_MAPPING.get(nativeDataType, NullTypeClass) schema_field = SchemaField( diff --git a/metadata-ingestion/src/datahub/ingestion/source/usage/usage_common.py b/metadata-ingestion/src/datahub/ingestion/source/usage/usage_common.py index a2dbef538f73cb..2b7aae8330905e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/usage/usage_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/usage/usage_common.py @@ -83,9 +83,11 @@ def make_usage_workunit( budget_per_query: int = int(queries_character_limit / top_n_queries) top_sql_queries = [ trim_query( - format_sql_query(query, keyword_case="upper", reindent_aligned=True) - if format_sql_queries - else query, + ( + format_sql_query(query, keyword_case="upper", reindent_aligned=True) + if format_sql_queries + else query + ), budget_per_query=budget_per_query, query_trimmer_string=query_trimmer_string, ) @@ -149,7 +151,7 @@ def add_read_entry( self.userFreq[user_email] += count if query: - self.queryCount += 1 + self.queryCount += count self.queryFreq[query] += count for column in fields: self.columnFreq[column] += count @@ -295,21 +297,25 @@ def convert_usage_aggregation_class( uniqueUserCount=obj.metrics.uniqueUserCount, totalSqlQueries=obj.metrics.totalSqlQueries, topSqlQueries=obj.metrics.topSqlQueries, - userCounts=[ - DatasetUserUsageCountsClass( - user=u.user, count=u.count, userEmail=u.userEmail - ) - for u in obj.metrics.users - if u.user is not None - ] - if obj.metrics.users - else None, - fieldCounts=[ - DatasetFieldUsageCountsClass(fieldPath=f.fieldName, count=f.count) - for f in obj.metrics.fields - ] - if obj.metrics.fields - else None, + userCounts=( + [ + DatasetUserUsageCountsClass( + user=u.user, count=u.count, userEmail=u.userEmail + ) + for u in obj.metrics.users + if u.user is not None + ] + if obj.metrics.users + else None + ), + fieldCounts=( + [ + DatasetFieldUsageCountsClass(fieldPath=f.fieldName, count=f.count) + for f in obj.metrics.fields + ] + if obj.metrics.fields + else None + ), ) return MetadataChangeProposalWrapper(entityUrn=obj.resource, aspect=aspect) else: diff --git a/metadata-ingestion/src/datahub/ingestion/source_config/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source_config/bigquery.py deleted file mode 100644 index 1b96da4fecbc9c..00000000000000 --- a/metadata-ingestion/src/datahub/ingestion/source_config/bigquery.py +++ /dev/null @@ -1,44 +0,0 @@ -import re - -import pydantic - -from datahub.configuration.common import ConfigModel - -# Regexp for sharded tables. -# A sharded table is a table that has a suffix of the form _yyyymmdd or yyyymmdd, where yyyymmdd is a date. -# The regexp checks for valid dates in the suffix (e.g. 20200101, 20200229, 20201231) and if the date is not valid -# then it is not a sharded table. -_BIGQUERY_DEFAULT_SHARDED_TABLE_REGEX: str = ( - "((.+\\D)[_$]?)?(\\d\\d\\d\\d(?:0[1-9]|1[0-2])(?:0[1-9]|[12][0-9]|3[01]))$" -) - - -class BigQueryBaseConfig(ConfigModel): - rate_limit: bool = pydantic.Field( - default=False, description="Should we rate limit requests made to API." - ) - requests_per_min: int = pydantic.Field( - default=60, - description="Used to control number of API calls made per min. Only used when `rate_limit` is set to `True`.", - ) - - temp_table_dataset_prefix: str = pydantic.Field( - default="_", - description="If you are creating temp tables in a dataset with a particular prefix you can use this config to set the prefix for the dataset. This is to support workflows from before bigquery's introduction of temp tables. By default we use `_` because of datasets that begin with an underscore are hidden by default https://cloud.google.com/bigquery/docs/datasets#dataset-naming.", - ) - - sharded_table_pattern: str = pydantic.Field( - deprecated=True, - default=_BIGQUERY_DEFAULT_SHARDED_TABLE_REGEX, - description="The regex pattern to match sharded tables and group as one table. This is a very low level config parameter, only change if you know what you are doing, ", - ) - - @pydantic.validator("sharded_table_pattern") - def sharded_table_pattern_is_a_valid_regexp(cls, v): - try: - re.compile(v) - except Exception as e: - raise ValueError( - f"sharded_table_pattern configuration pattern is invalid. The exception was: {e}" - ) - return v diff --git a/metadata-ingestion/src/datahub/ingestion/source_config/usage/bigquery_usage.py b/metadata-ingestion/src/datahub/ingestion/source_config/usage/bigquery_usage.py deleted file mode 100644 index cc87c791120a87..00000000000000 --- a/metadata-ingestion/src/datahub/ingestion/source_config/usage/bigquery_usage.py +++ /dev/null @@ -1,163 +0,0 @@ -import json -import logging -import os -import tempfile -from datetime import timedelta -from typing import Any, Dict, List, Optional - -import pydantic - -from datahub.configuration import ConfigModel -from datahub.configuration.common import AllowDenyPattern -from datahub.configuration.source_common import EnvConfigMixin -from datahub.configuration.validate_field_removal import pydantic_removed_field -from datahub.configuration.validate_multiline_string import pydantic_multiline_string -from datahub.ingestion.source.usage.usage_common import BaseUsageConfig -from datahub.ingestion.source_config.bigquery import BigQueryBaseConfig - -logger = logging.getLogger(__name__) - - -class BigQueryCredential(ConfigModel): - project_id: str = pydantic.Field(description="Project id to set the credentials") - private_key_id: str = pydantic.Field(description="Private key id") - private_key: str = pydantic.Field( - description="Private key in a form of '-----BEGIN PRIVATE KEY-----\\nprivate-key\\n-----END PRIVATE KEY-----\\n'" - ) - client_email: str = pydantic.Field(description="Client email") - client_id: str = pydantic.Field(description="Client Id") - auth_uri: str = pydantic.Field( - default="https://accounts.google.com/o/oauth2/auth", - description="Authentication uri", - ) - token_uri: str = pydantic.Field( - default="https://oauth2.googleapis.com/token", description="Token uri" - ) - auth_provider_x509_cert_url: str = pydantic.Field( - default="https://www.googleapis.com/oauth2/v1/certs", - description="Auth provider x509 certificate url", - ) - type: str = pydantic.Field( - default="service_account", description="Authentication type" - ) - client_x509_cert_url: Optional[str] = pydantic.Field( - default=None, - description="If not set it will be default to https://www.googleapis.com/robot/v1/metadata/x509/client_email", - ) - - _fix_private_key_newlines = pydantic_multiline_string("private_key") - - @pydantic.root_validator(skip_on_failure=True) - def validate_config(cls, values: Dict[str, Any]) -> Dict[str, Any]: - if values.get("client_x509_cert_url") is None: - values[ - "client_x509_cert_url" - ] = f'https://www.googleapis.com/robot/v1/metadata/x509/{values["client_email"]}' - return values - - def create_credential_temp_file(self) -> str: - with tempfile.NamedTemporaryFile(delete=False) as fp: - cred_json = json.dumps(self.dict(), indent=4, separators=(",", ": ")) - fp.write(cred_json.encode()) - return fp.name - - -class BigQueryUsageConfig(BigQueryBaseConfig, EnvConfigMixin, BaseUsageConfig): - projects: Optional[List[str]] = pydantic.Field( - default=None, - description="List of project ids to ingest usage from. If not specified, will infer from environment.", - ) - project_id: Optional[str] = pydantic.Field( - default=None, - description="Project ID to ingest usage from. If not specified, will infer from environment. Deprecated in favour of projects ", - ) - extra_client_options: dict = pydantic.Field( - default_factory=dict, - description="Additional options to pass to google.cloud.logging_v2.client.Client.", - ) - use_v2_audit_metadata: Optional[bool] = pydantic.Field( - default=False, - description="Whether to ingest logs using the v2 format. Required if use_exported_bigquery_audit_metadata is set to True.", - ) - - bigquery_audit_metadata_datasets: Optional[List[str]] = pydantic.Field( - description="A list of datasets that contain a table named cloudaudit_googleapis_com_data_access which contain BigQuery audit logs, specifically, those containing BigQueryAuditMetadata. It is recommended that the project of the dataset is also specified, for example, projectA.datasetB.", - ) - use_exported_bigquery_audit_metadata: bool = pydantic.Field( - default=False, - description="When configured, use BigQueryAuditMetadata in bigquery_audit_metadata_datasets to compute usage information.", - ) - - use_date_sharded_audit_log_tables: bool = pydantic.Field( - default=False, - description="Whether to read date sharded tables or time partitioned tables when extracting usage from exported audit logs.", - ) - - table_pattern: AllowDenyPattern = pydantic.Field( - default=AllowDenyPattern.allow_all(), - description="List of regex patterns for tables to include/exclude from ingestion.", - ) - dataset_pattern: AllowDenyPattern = pydantic.Field( - default=AllowDenyPattern.allow_all(), - description="List of regex patterns for datasets to include/exclude from ingestion.", - ) - log_page_size: pydantic.PositiveInt = pydantic.Field( - default=1000, - description="", - ) - - query_log_delay: Optional[pydantic.PositiveInt] = pydantic.Field( - default=None, - description="To account for the possibility that the query event arrives after the read event in the audit logs, we wait for at least query_log_delay additional events to be processed before attempting to resolve BigQuery job information from the logs. If query_log_delay is None, it gets treated as an unlimited delay, which prioritizes correctness at the expense of memory usage.", - ) - - max_query_duration: timedelta = pydantic.Field( - default=timedelta(minutes=15), - description="Correction to pad start_time and end_time with. For handling the case where the read happens within our time range but the query completion event is delayed and happens after the configured end time.", - ) - - credential: Optional[BigQueryCredential] = pydantic.Field( - default=None, - description="Bigquery credential. Required if GOOGLE_APPLICATION_CREDENTIALS environment variable is not set. See this example recipe for details", - ) - _credentials_path: Optional[str] = pydantic.PrivateAttr(None) - temp_table_dataset_prefix: str = pydantic.Field( - default="_", - description="If you are creating temp tables in a dataset with a particular prefix you can use this config to set the prefix for the dataset. This is to support workflows from before bigquery's introduction of temp tables. By default we use `_` because of datasets that begin with an underscore are hidden by default https://cloud.google.com/bigquery/docs/datasets#dataset-naming.", - ) - - def __init__(self, **data: Any): - super().__init__(**data) - if self.credential: - self._credentials_path = self.credential.create_credential_temp_file() - logger.debug( - f"Creating temporary credential file at {self._credentials_path}" - ) - os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = self._credentials_path - - @pydantic.validator("project_id") - def note_project_id_deprecation(cls, v, values, **kwargs): - logger.warning( - "bigquery-usage project_id option is deprecated; use projects instead" - ) - values["projects"] = [v] - return None - - # BigQuery project-ids are globally unique. - platform_instance_not_supported_for_bigquery = pydantic_removed_field( - "platform_instance" - ) - - @pydantic.validator("use_exported_bigquery_audit_metadata") - def use_exported_bigquery_audit_metadata_uses_v2(cls, v, values): - if v is True and not values["use_v2_audit_metadata"]: - raise ValueError( - "To use exported BigQuery audit metadata, you must also use v2 audit metadata" - ) - return v - - def get_allow_pattern_string(self) -> str: - return "|".join(self.table_pattern.allow) if self.table_pattern else "" - - def get_deny_pattern_string(self) -> str: - return "|".join(self.table_pattern.deny) if self.table_pattern else "" diff --git a/metadata-ingestion/src/datahub/ingestion/source_config/usage/snowflake_usage.py b/metadata-ingestion/src/datahub/ingestion/source_config/usage/snowflake_usage.py deleted file mode 100644 index a0e79f62240ee3..00000000000000 --- a/metadata-ingestion/src/datahub/ingestion/source_config/usage/snowflake_usage.py +++ /dev/null @@ -1,19 +0,0 @@ -import logging -from typing import Optional - -import pydantic - -from datahub.ingestion.source.usage.usage_common import BaseUsageConfig - -logger = logging.getLogger(__name__) - - -class SnowflakeUsageConfig(BaseUsageConfig): - email_domain: Optional[str] = pydantic.Field( - default=None, - description="Email domain of your organization so users can be displayed on UI appropriately.", - ) - apply_view_usage_to_tables: bool = pydantic.Field( - default=False, - description="Whether to apply view's usage to its base tables. If set to True, usage is applied to base tables only.", - ) diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_dataproduct.py b/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_dataproduct.py index 45e92628430258..c474e423030e05 100644 --- a/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_dataproduct.py +++ b/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_dataproduct.py @@ -11,7 +11,7 @@ from datahub.ingestion.transformer.dataset_transformer import ( DatasetDataproductTransformer, ) -from datahub.metadata.schema_classes import MetadataChangeProposalClass +from datahub.metadata.schema_classes import ContainerClass, MetadataChangeProposalClass from datahub.specific.dataproduct import DataProductPatchBuilder logger = logging.getLogger(__name__) @@ -23,6 +23,8 @@ class AddDatasetDataProductConfig(ConfigModel): _resolve_data_product_fn = pydantic_resolve_key("get_data_product_to_add") + is_container: bool = False + class AddDatasetDataProduct(DatasetDataproductTransformer): """Transformer that adds dataproduct entity for provided dataset as its asset according to a callback function.""" @@ -49,10 +51,11 @@ def handle_end_of_stream( self, ) -> List[Union[MetadataChangeProposalWrapper, MetadataChangeProposalClass]]: data_products: Dict[str, DataProductPatchBuilder] = {} - + data_products_container: Dict[str, DataProductPatchBuilder] = {} logger.debug("Generating dataproducts") for entity_urn in self.entity_map.keys(): data_product_urn = self.config.get_data_product_to_add(entity_urn) + is_container = self.config.is_container if data_product_urn: if data_product_urn not in data_products: data_products[data_product_urn] = DataProductPatchBuilder( @@ -63,11 +66,34 @@ def handle_end_of_stream( data_product_urn ].add_asset(entity_urn) + if is_container: + assert self.ctx.graph + container_aspect = self.ctx.graph.get_aspect( + entity_urn, aspect_type=ContainerClass + ) + if not container_aspect: + continue + container_urn = container_aspect.container + if data_product_urn not in data_products_container: + container_product = DataProductPatchBuilder( + data_product_urn + ).add_asset(container_urn) + data_products_container[data_product_urn] = container_product + else: + data_products_container[ + data_product_urn + ] = data_products_container[data_product_urn].add_asset( + container_urn + ) + mcps: List[ Union[MetadataChangeProposalWrapper, MetadataChangeProposalClass] ] = [] for data_product in data_products.values(): mcps.extend(list(data_product.build())) + if is_container: + for data_product in data_products_container.values(): + mcps.extend(list(data_product.build())) return mcps @@ -97,6 +123,7 @@ def create( class PatternDatasetDataProductConfig(ConfigModel): dataset_to_data_product_urns_pattern: KeyValuePattern = KeyValuePattern.all() + is_container: bool = False @pydantic.root_validator(pre=True) def validate_pattern_value(cls, values: Dict) -> Dict: @@ -122,6 +149,7 @@ def __init__(self, config: PatternDatasetDataProductConfig, ctx: PipelineContext )[0] if dataset_to_data_product.value(dataset_urn) else None, + is_container=config.is_container, ) super().__init__(generic_config, ctx) diff --git a/metadata-ingestion/src/datahub/integrations/great_expectations/action.py b/metadata-ingestion/src/datahub/integrations/great_expectations/action.py index 94501b0d499b75..cdc8c8268b4883 100644 --- a/metadata-ingestion/src/datahub/integrations/great_expectations/action.py +++ b/metadata-ingestion/src/datahub/integrations/great_expectations/action.py @@ -1,867 +1,3 @@ -from datahub.utilities._markupsafe_compat import MARKUPSAFE_PATCHED +from datahub_gx_plugin.action import DataHubValidationAction -import json -import logging -import sys -import time -from dataclasses import dataclass -from datetime import timezone -from decimal import Decimal -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union - -from great_expectations.checkpoint.actions import ValidationAction -from great_expectations.core.batch import Batch -from great_expectations.core.batch_spec import ( - RuntimeDataBatchSpec, - RuntimeQueryBatchSpec, - SqlAlchemyDatasourceBatchSpec, -) -from great_expectations.core.expectation_validation_result import ( - ExpectationSuiteValidationResult, -) -from great_expectations.data_asset.data_asset import DataAsset -from great_expectations.data_context.data_context import DataContext -from great_expectations.data_context.types.resource_identifiers import ( - ExpectationSuiteIdentifier, - ValidationResultIdentifier, -) -from great_expectations.execution_engine import PandasExecutionEngine -from great_expectations.execution_engine.sqlalchemy_execution_engine import ( - SqlAlchemyExecutionEngine, -) -from great_expectations.validator.validator import Validator -from sqlalchemy.engine.base import Connection, Engine -from sqlalchemy.engine.url import make_url - -import datahub.emitter.mce_builder as builder -from datahub.cli.env_utils import get_boolean_env_variable -from datahub.emitter.mcp import MetadataChangeProposalWrapper -from datahub.emitter.rest_emitter import DatahubRestEmitter -from datahub.emitter.serialization_helper import pre_json_transform -from datahub.ingestion.source.sql.sqlalchemy_uri_mapper import ( - get_platform_from_sqlalchemy_uri, -) -from datahub.metadata.com.linkedin.pegasus2avro.assertion import ( - AssertionInfo, - AssertionResult, - AssertionResultType, - AssertionRunEvent, - AssertionRunStatus, - AssertionStdAggregation, - AssertionStdOperator, - AssertionStdParameter, - AssertionStdParameters, - AssertionStdParameterType, - AssertionType, - BatchSpec, - DatasetAssertionInfo, - DatasetAssertionScope, -) -from datahub.metadata.com.linkedin.pegasus2avro.common import DataPlatformInstance -from datahub.metadata.schema_classes import PartitionSpecClass, PartitionTypeClass -from datahub.utilities.sql_parser import DefaultSQLParser - -if TYPE_CHECKING: - from great_expectations.data_context.types.resource_identifiers import ( - GXCloudIdentifier, - ) - -assert MARKUPSAFE_PATCHED -logger = logging.getLogger(__name__) -if get_boolean_env_variable("DATAHUB_DEBUG", False): - handler = logging.StreamHandler(stream=sys.stdout) - logger.addHandler(handler) - logger.setLevel(logging.DEBUG) - -GE_PLATFORM_NAME = "great-expectations" - - -class DataHubValidationAction(ValidationAction): - def __init__( - self, - data_context: DataContext, - server_url: str, - env: str = builder.DEFAULT_ENV, - platform_alias: Optional[str] = None, - platform_instance_map: Optional[Dict[str, str]] = None, - graceful_exceptions: bool = True, - token: Optional[str] = None, - timeout_sec: Optional[float] = None, - retry_status_codes: Optional[List[int]] = None, - retry_max_times: Optional[int] = None, - extra_headers: Optional[Dict[str, str]] = None, - exclude_dbname: Optional[bool] = None, - parse_table_names_from_sql: bool = False, - convert_urns_to_lowercase: bool = False, - ): - super().__init__(data_context) - self.server_url = server_url - self.env = env - self.platform_alias = platform_alias - self.platform_instance_map = platform_instance_map - self.graceful_exceptions = graceful_exceptions - self.token = token - self.timeout_sec = timeout_sec - self.retry_status_codes = retry_status_codes - self.retry_max_times = retry_max_times - self.extra_headers = extra_headers - self.exclude_dbname = exclude_dbname - self.parse_table_names_from_sql = parse_table_names_from_sql - self.convert_urns_to_lowercase = convert_urns_to_lowercase - - def _run( - self, - validation_result_suite: ExpectationSuiteValidationResult, - validation_result_suite_identifier: Union[ - ValidationResultIdentifier, "GXCloudIdentifier" - ], - data_asset: Union[Validator, DataAsset, Batch], - payload: Optional[Any] = None, - expectation_suite_identifier: Optional[ExpectationSuiteIdentifier] = None, - checkpoint_identifier: Optional[Any] = None, - ) -> Dict: - datasets = [] - try: - emitter = DatahubRestEmitter( - gms_server=self.server_url, - token=self.token, - read_timeout_sec=self.timeout_sec, - connect_timeout_sec=self.timeout_sec, - retry_status_codes=self.retry_status_codes, - retry_max_times=self.retry_max_times, - extra_headers=self.extra_headers, - ) - - expectation_suite_name = validation_result_suite.meta.get( - "expectation_suite_name" - ) - run_id = validation_result_suite.meta.get("run_id") - if hasattr(data_asset, "active_batch_id"): - batch_identifier = data_asset.active_batch_id - else: - batch_identifier = data_asset.batch_id - - if isinstance( - validation_result_suite_identifier, ValidationResultIdentifier - ): - expectation_suite_name = ( - validation_result_suite_identifier.expectation_suite_identifier.expectation_suite_name - ) - run_id = validation_result_suite_identifier.run_id - batch_identifier = validation_result_suite_identifier.batch_identifier - - # Returns datasets and corresponding batch requests - datasets = self.get_dataset_partitions(batch_identifier, data_asset) - - if len(datasets) == 0 or datasets[0]["dataset_urn"] is None: - warn("Metadata not sent to datahub. No datasets found.") - return {"datahub_notification_result": "none required"} - - # Returns assertion info and assertion results - assertions = self.get_assertions_with_results( - validation_result_suite, - expectation_suite_name, - run_id, - payload, - datasets, - ) - - logger.info("Sending metadata to datahub ...") - logger.info("Dataset URN - {urn}".format(urn=datasets[0]["dataset_urn"])) - - for assertion in assertions: - logger.info( - "Assertion URN - {urn}".format(urn=assertion["assertionUrn"]) - ) - - # Construct a MetadataChangeProposalWrapper object. - assertion_info_mcp = MetadataChangeProposalWrapper( - entityUrn=assertion["assertionUrn"], - aspect=assertion["assertionInfo"], - ) - emitter.emit_mcp(assertion_info_mcp) - - # Construct a MetadataChangeProposalWrapper object. - assertion_platform_mcp = MetadataChangeProposalWrapper( - entityUrn=assertion["assertionUrn"], - aspect=assertion["assertionPlatform"], - ) - emitter.emit_mcp(assertion_platform_mcp) - - for assertionResult in assertion["assertionResults"]: - dataset_assertionResult_mcp = MetadataChangeProposalWrapper( - entityUrn=assertionResult.assertionUrn, - aspect=assertionResult, - ) - - # Emit Result! (timeseries aspect) - emitter.emit_mcp(dataset_assertionResult_mcp) - logger.info("Metadata sent to datahub.") - result = "DataHub notification succeeded" - except Exception as e: - result = "DataHub notification failed" - if self.graceful_exceptions: - logger.error(e) - logger.info("Suppressing error because graceful_exceptions is set") - else: - raise - - return {"datahub_notification_result": result} - - def get_assertions_with_results( - self, - validation_result_suite, - expectation_suite_name, - run_id, - payload, - datasets, - ): - dataPlatformInstance = DataPlatformInstance( - platform=builder.make_data_platform_urn(GE_PLATFORM_NAME) - ) - docs_link = None - if payload: - # process the payload - for action_names in payload.keys(): - if payload[action_names]["class"] == "UpdateDataDocsAction": - data_docs_pages = payload[action_names] - for docs_link_key, docs_link_val in data_docs_pages.items(): - if "file://" not in docs_link_val and docs_link_key != "class": - docs_link = docs_link_val - - assertions_with_results = [] - for result in validation_result_suite.results: - expectation_config = result["expectation_config"] - expectation_type = expectation_config["expectation_type"] - success = bool(result["success"]) - kwargs = { - k: v for k, v in expectation_config["kwargs"].items() if k != "batch_id" - } - - result = result["result"] - assertion_datasets = [d["dataset_urn"] for d in datasets] - if len(datasets) == 1 and "column" in kwargs: - assertion_fields = [ - builder.make_schema_field_urn( - datasets[0]["dataset_urn"], kwargs["column"] - ) - ] - else: - assertion_fields = None # type:ignore - - # Be careful what fields to consider for creating assertion urn. - # Any change in fields below would lead to a new assertion - # FIXME - Currently, when using evaluation parameters, new assertion is - # created when runtime resolved kwargs are different, - # possibly for each validation run - assertionUrn = builder.make_assertion_urn( - builder.datahub_guid( - pre_json_transform( - { - "platform": GE_PLATFORM_NAME, - "nativeType": expectation_type, - "nativeParameters": kwargs, - "dataset": assertion_datasets[0], - "fields": assertion_fields, - } - ) - ) - ) - logger.debug( - "GE expectation_suite_name - {name}, expectation_type - {type}, Assertion URN - {urn}".format( - name=expectation_suite_name, type=expectation_type, urn=assertionUrn - ) - ) - assertionInfo: AssertionInfo = self.get_assertion_info( - expectation_type, - kwargs, - assertion_datasets[0], - assertion_fields, - expectation_suite_name, - ) - - # TODO: Understand why their run time is incorrect. - run_time = run_id.run_time.astimezone(timezone.utc) - evaluation_parameters = ( - { - k: convert_to_string(v) - for k, v in validation_result_suite.evaluation_parameters.items() - if k and v - } - if validation_result_suite.evaluation_parameters - else None - ) - - nativeResults = { - k: convert_to_string(v) - for k, v in result.items() - if ( - k - in [ - "observed_value", - "partial_unexpected_list", - "partial_unexpected_counts", - "details", - ] - and v - ) - } - - actualAggValue = ( - result.get("observed_value") - if isinstance(result.get("observed_value"), (int, float)) - else None - ) - - ds = datasets[0] - # https://docs.greatexpectations.io/docs/reference/expectations/result_format/ - assertionResult = AssertionRunEvent( - timestampMillis=int(round(time.time() * 1000)), - assertionUrn=assertionUrn, - asserteeUrn=ds["dataset_urn"], - runId=run_time.strftime("%Y-%m-%dT%H:%M:%SZ"), - result=AssertionResult( - type=AssertionResultType.SUCCESS - if success - else AssertionResultType.FAILURE, - rowCount=parse_int_or_default(result.get("element_count")), - missingCount=parse_int_or_default(result.get("missing_count")), - unexpectedCount=parse_int_or_default( - result.get("unexpected_count") - ), - actualAggValue=actualAggValue, - externalUrl=docs_link, - nativeResults=nativeResults, - ), - batchSpec=ds["batchSpec"], - status=AssertionRunStatus.COMPLETE, - runtimeContext=evaluation_parameters, - ) - if ds.get("partitionSpec") is not None: - assertionResult.partitionSpec = ds.get("partitionSpec") - assertionResults = [assertionResult] - assertions_with_results.append( - { - "assertionUrn": assertionUrn, - "assertionInfo": assertionInfo, - "assertionPlatform": dataPlatformInstance, - "assertionResults": assertionResults, - } - ) - return assertions_with_results - - def get_assertion_info( - self, expectation_type, kwargs, dataset, fields, expectation_suite_name - ): - # TODO - can we find exact type of min and max value - def get_min_max(kwargs, type=AssertionStdParameterType.UNKNOWN): - return AssertionStdParameters( - minValue=AssertionStdParameter( - value=convert_to_string(kwargs.get("min_value")), - type=type, - ), - maxValue=AssertionStdParameter( - value=convert_to_string(kwargs.get("max_value")), - type=type, - ), - ) - - known_expectations: Dict[str, DataHubStdAssertion] = { - # column aggregate expectations - "expect_column_min_to_be_between": DataHubStdAssertion( - scope=DatasetAssertionScope.DATASET_COLUMN, - operator=AssertionStdOperator.BETWEEN, - aggregation=AssertionStdAggregation.MIN, - parameters=get_min_max(kwargs), - ), - "expect_column_max_to_be_between": DataHubStdAssertion( - scope=DatasetAssertionScope.DATASET_COLUMN, - operator=AssertionStdOperator.BETWEEN, - aggregation=AssertionStdAggregation.MAX, - parameters=get_min_max(kwargs), - ), - "expect_column_median_to_be_between": DataHubStdAssertion( - scope=DatasetAssertionScope.DATASET_COLUMN, - operator=AssertionStdOperator.BETWEEN, - aggregation=AssertionStdAggregation.MEDIAN, - parameters=get_min_max(kwargs), - ), - "expect_column_stdev_to_be_between": DataHubStdAssertion( - scope=DatasetAssertionScope.DATASET_COLUMN, - operator=AssertionStdOperator.BETWEEN, - aggregation=AssertionStdAggregation.STDDEV, - parameters=get_min_max(kwargs, AssertionStdParameterType.NUMBER), - ), - "expect_column_mean_to_be_between": DataHubStdAssertion( - scope=DatasetAssertionScope.DATASET_COLUMN, - operator=AssertionStdOperator.BETWEEN, - aggregation=AssertionStdAggregation.MEAN, - parameters=get_min_max(kwargs, AssertionStdParameterType.NUMBER), - ), - "expect_column_unique_value_count_to_be_between": DataHubStdAssertion( - scope=DatasetAssertionScope.DATASET_COLUMN, - operator=AssertionStdOperator.BETWEEN, - aggregation=AssertionStdAggregation.UNIQUE_COUNT, - parameters=get_min_max(kwargs, AssertionStdParameterType.NUMBER), - ), - "expect_column_proportion_of_unique_values_to_be_between": DataHubStdAssertion( - scope=DatasetAssertionScope.DATASET_COLUMN, - operator=AssertionStdOperator.BETWEEN, - aggregation=AssertionStdAggregation.UNIQUE_PROPOTION, - parameters=get_min_max(kwargs, AssertionStdParameterType.NUMBER), - ), - "expect_column_sum_to_be_between": DataHubStdAssertion( - scope=DatasetAssertionScope.DATASET_COLUMN, - operator=AssertionStdOperator.BETWEEN, - aggregation=AssertionStdAggregation.SUM, - parameters=get_min_max(kwargs, AssertionStdParameterType.NUMBER), - ), - "expect_column_quantile_values_to_be_between": DataHubStdAssertion( - scope=DatasetAssertionScope.DATASET_COLUMN, - operator=AssertionStdOperator.BETWEEN, - aggregation=AssertionStdAggregation._NATIVE_, - ), - # column map expectations - "expect_column_values_to_not_be_null": DataHubStdAssertion( - scope=DatasetAssertionScope.DATASET_COLUMN, - operator=AssertionStdOperator.NOT_NULL, - aggregation=AssertionStdAggregation.IDENTITY, - ), - "expect_column_values_to_be_in_set": DataHubStdAssertion( - scope=DatasetAssertionScope.DATASET_COLUMN, - operator=AssertionStdOperator.IN, - aggregation=AssertionStdAggregation.IDENTITY, - parameters=AssertionStdParameters( - value=AssertionStdParameter( - value=convert_to_string(kwargs.get("value_set")), - type=AssertionStdParameterType.SET, - ) - ), - ), - "expect_column_values_to_be_between": DataHubStdAssertion( - scope=DatasetAssertionScope.DATASET_COLUMN, - operator=AssertionStdOperator.BETWEEN, - aggregation=AssertionStdAggregation.IDENTITY, - parameters=get_min_max(kwargs), - ), - "expect_column_values_to_match_regex": DataHubStdAssertion( - scope=DatasetAssertionScope.DATASET_COLUMN, - operator=AssertionStdOperator.REGEX_MATCH, - aggregation=AssertionStdAggregation.IDENTITY, - parameters=AssertionStdParameters( - value=AssertionStdParameter( - value=kwargs.get("regex"), - type=AssertionStdParameterType.STRING, - ) - ), - ), - "expect_column_values_to_match_regex_list": DataHubStdAssertion( - scope=DatasetAssertionScope.DATASET_COLUMN, - operator=AssertionStdOperator.REGEX_MATCH, - aggregation=AssertionStdAggregation.IDENTITY, - parameters=AssertionStdParameters( - value=AssertionStdParameter( - value=convert_to_string(kwargs.get("regex_list")), - type=AssertionStdParameterType.LIST, - ) - ), - ), - "expect_table_columns_to_match_ordered_list": DataHubStdAssertion( - scope=DatasetAssertionScope.DATASET_SCHEMA, - operator=AssertionStdOperator.EQUAL_TO, - aggregation=AssertionStdAggregation.COLUMNS, - parameters=AssertionStdParameters( - value=AssertionStdParameter( - value=convert_to_string(kwargs.get("column_list")), - type=AssertionStdParameterType.LIST, - ) - ), - ), - "expect_table_columns_to_match_set": DataHubStdAssertion( - scope=DatasetAssertionScope.DATASET_SCHEMA, - operator=AssertionStdOperator.EQUAL_TO, - aggregation=AssertionStdAggregation.COLUMNS, - parameters=AssertionStdParameters( - value=AssertionStdParameter( - value=convert_to_string(kwargs.get("column_set")), - type=AssertionStdParameterType.SET, - ) - ), - ), - "expect_table_column_count_to_be_between": DataHubStdAssertion( - scope=DatasetAssertionScope.DATASET_SCHEMA, - operator=AssertionStdOperator.BETWEEN, - aggregation=AssertionStdAggregation.COLUMN_COUNT, - parameters=get_min_max(kwargs, AssertionStdParameterType.NUMBER), - ), - "expect_table_column_count_to_equal": DataHubStdAssertion( - scope=DatasetAssertionScope.DATASET_SCHEMA, - operator=AssertionStdOperator.EQUAL_TO, - aggregation=AssertionStdAggregation.COLUMN_COUNT, - parameters=AssertionStdParameters( - value=AssertionStdParameter( - value=convert_to_string(kwargs.get("value")), - type=AssertionStdParameterType.NUMBER, - ) - ), - ), - "expect_column_to_exist": DataHubStdAssertion( - scope=DatasetAssertionScope.DATASET_SCHEMA, - operator=AssertionStdOperator._NATIVE_, - aggregation=AssertionStdAggregation._NATIVE_, - ), - "expect_table_row_count_to_equal": DataHubStdAssertion( - scope=DatasetAssertionScope.DATASET_ROWS, - operator=AssertionStdOperator.EQUAL_TO, - aggregation=AssertionStdAggregation.ROW_COUNT, - parameters=AssertionStdParameters( - value=AssertionStdParameter( - value=convert_to_string(kwargs.get("value")), - type=AssertionStdParameterType.NUMBER, - ) - ), - ), - "expect_table_row_count_to_be_between": DataHubStdAssertion( - scope=DatasetAssertionScope.DATASET_ROWS, - operator=AssertionStdOperator.BETWEEN, - aggregation=AssertionStdAggregation.ROW_COUNT, - parameters=get_min_max(kwargs, AssertionStdParameterType.NUMBER), - ), - } - - datasetAssertionInfo = DatasetAssertionInfo( - dataset=dataset, - fields=fields, - operator=AssertionStdOperator._NATIVE_, - aggregation=AssertionStdAggregation._NATIVE_, - nativeType=expectation_type, - nativeParameters={k: convert_to_string(v) for k, v in kwargs.items()}, - scope=DatasetAssertionScope.DATASET_ROWS, - ) - - if expectation_type in known_expectations.keys(): - assertion = known_expectations[expectation_type] - datasetAssertionInfo.scope = assertion.scope - datasetAssertionInfo.aggregation = assertion.aggregation - datasetAssertionInfo.operator = assertion.operator - datasetAssertionInfo.parameters = assertion.parameters - - # Heuristically mapping other expectations - else: - if "column" in kwargs and expectation_type.startswith( - "expect_column_value" - ): - datasetAssertionInfo.scope = DatasetAssertionScope.DATASET_COLUMN - datasetAssertionInfo.aggregation = AssertionStdAggregation.IDENTITY - elif "column" in kwargs: - datasetAssertionInfo.scope = DatasetAssertionScope.DATASET_COLUMN - datasetAssertionInfo.aggregation = AssertionStdAggregation._NATIVE_ - - return AssertionInfo( - type=AssertionType.DATASET, - datasetAssertion=datasetAssertionInfo, - customProperties={"expectation_suite_name": expectation_suite_name}, - ) - - def get_dataset_partitions(self, batch_identifier, data_asset): - dataset_partitions = [] - - logger.debug("Finding datasets being validated") - - # for now, we support only v3-api and sqlalchemy execution engine and Pandas engine - is_sql_alchemy = isinstance(data_asset, Validator) and ( - isinstance(data_asset.execution_engine, SqlAlchemyExecutionEngine) - ) - is_pandas = isinstance(data_asset.execution_engine, PandasExecutionEngine) - if is_sql_alchemy or is_pandas: - ge_batch_spec = data_asset.active_batch_spec - partitionSpec = None - batchSpecProperties = { - "data_asset_name": str( - data_asset.active_batch_definition.data_asset_name - ), - "datasource_name": str( - data_asset.active_batch_definition.datasource_name - ), - } - sqlalchemy_uri = None - if is_sql_alchemy and isinstance( - data_asset.execution_engine.engine, Engine - ): - sqlalchemy_uri = data_asset.execution_engine.engine.url - # For snowflake sqlalchemy_execution_engine.engine is actually instance of Connection - elif is_sql_alchemy and isinstance( - data_asset.execution_engine.engine, Connection - ): - sqlalchemy_uri = data_asset.execution_engine.engine.engine.url - - if isinstance(ge_batch_spec, SqlAlchemyDatasourceBatchSpec): - # e.g. ConfiguredAssetSqlDataConnector with splitter_method or sampling_method - schema_name = ge_batch_spec.get("schema_name") - table_name = ge_batch_spec.get("table_name") - - dataset_urn = make_dataset_urn_from_sqlalchemy_uri( - sqlalchemy_uri, - schema_name, - table_name, - self.env, - self.get_platform_instance( - data_asset.active_batch_definition.datasource_name - ), - self.exclude_dbname, - self.platform_alias, - self.convert_urns_to_lowercase, - ) - batchSpec = BatchSpec( - nativeBatchId=batch_identifier, - customProperties=batchSpecProperties, - ) - - splitter_method = ge_batch_spec.get("splitter_method") - if ( - splitter_method is not None - and splitter_method != "_split_on_whole_table" - ): - batch_identifiers = ge_batch_spec.get("batch_identifiers", {}) - partitionSpec = PartitionSpecClass( - partition=convert_to_string(batch_identifiers) - ) - sampling_method = ge_batch_spec.get("sampling_method", "") - if sampling_method == "_sample_using_limit": - batchSpec.limit = ge_batch_spec["sampling_kwargs"]["n"] - - dataset_partitions.append( - { - "dataset_urn": dataset_urn, - "partitionSpec": partitionSpec, - "batchSpec": batchSpec, - } - ) - elif isinstance(ge_batch_spec, RuntimeQueryBatchSpec): - if not self.parse_table_names_from_sql: - warn( - "Enable parse_table_names_from_sql in DatahubValidationAction config\ - to try to parse the tables being asserted from SQL query" - ) - return [] - query = data_asset.batches[ - batch_identifier - ].batch_request.runtime_parameters["query"] - partitionSpec = PartitionSpecClass( - type=PartitionTypeClass.QUERY, - partition=f"Query_{builder.datahub_guid(pre_json_transform(query))}", - ) - - batchSpec = BatchSpec( - nativeBatchId=batch_identifier, - query=query, - customProperties=batchSpecProperties, - ) - try: - tables = DefaultSQLParser(query).get_tables() - except Exception as e: - logger.warning(f"Sql parser failed on {query} with {e}") - tables = [] - - if len(set(tables)) != 1: - warn( - "DataHubValidationAction does not support cross dataset assertions." - ) - return [] - for table in tables: - dataset_urn = make_dataset_urn_from_sqlalchemy_uri( - sqlalchemy_uri, - None, - table, - self.env, - self.get_platform_instance( - data_asset.active_batch_definition.datasource_name - ), - self.exclude_dbname, - self.platform_alias, - self.convert_urns_to_lowercase, - ) - dataset_partitions.append( - { - "dataset_urn": dataset_urn, - "partitionSpec": partitionSpec, - "batchSpec": batchSpec, - } - ) - elif isinstance(ge_batch_spec, RuntimeDataBatchSpec): - data_platform = self.get_platform_instance( - data_asset.active_batch_definition.datasource_name - ) - dataset_urn = builder.make_dataset_urn_with_platform_instance( - platform=data_platform - if self.platform_alias is None - else self.platform_alias, - name=data_asset.active_batch_definition.datasource_name, - platform_instance="", - env=self.env, - ) - batchSpec = BatchSpec( - nativeBatchId=batch_identifier, - query="", - customProperties=batchSpecProperties, - ) - dataset_partitions.append( - { - "dataset_urn": dataset_urn, - "partitionSpec": partitionSpec, - "batchSpec": batchSpec, - } - ) - else: - warn( - "DataHubValidationAction does not recognize this GE batch spec type- {batch_spec_type}.".format( - batch_spec_type=type(ge_batch_spec) - ) - ) - else: - # TODO - v2-spec - SqlAlchemyDataset support - warn( - "DataHubValidationAction does not recognize this GE data asset type - {asset_type}. This is either using v2-api or execution engine other than sqlalchemy.".format( - asset_type=type(data_asset) - ) - ) - - return dataset_partitions - - def get_platform_instance(self, datasource_name): - if self.platform_instance_map and datasource_name in self.platform_instance_map: - return self.platform_instance_map[datasource_name] - else: - warn( - f"Datasource {datasource_name} is not present in platform_instance_map" - ) - return None - - -def parse_int_or_default(value, default_value=None): - if value is None: - return default_value - else: - return int(value) - - -def make_dataset_urn_from_sqlalchemy_uri( - sqlalchemy_uri, - schema_name, - table_name, - env, - platform_instance=None, - exclude_dbname=None, - platform_alias=None, - convert_urns_to_lowercase=False, -): - data_platform = get_platform_from_sqlalchemy_uri(str(sqlalchemy_uri)) - url_instance = make_url(sqlalchemy_uri) - - if schema_name is None and "." in table_name: - schema_name, table_name = table_name.split(".")[-2:] - - if data_platform in ["redshift", "postgres"]: - schema_name = schema_name or "public" - if url_instance.database is None: - warn( - f"DataHubValidationAction failed to locate database name for {data_platform}." - ) - return None - schema_name = ( - schema_name if exclude_dbname else f"{url_instance.database}.{schema_name}" - ) - elif data_platform == "mssql": - schema_name = schema_name or "dbo" - if url_instance.database is None: - warn( - f"DataHubValidationAction failed to locate database name for {data_platform}." - ) - return None - schema_name = ( - schema_name if exclude_dbname else f"{url_instance.database}.{schema_name}" - ) - elif data_platform in ["trino", "snowflake"]: - if schema_name is None or url_instance.database is None: - warn( - "DataHubValidationAction failed to locate schema name and/or database name for {data_platform}.".format( - data_platform=data_platform - ) - ) - return None - # If data platform is snowflake, we artificially lowercase the Database name. - # This is because DataHub also does this during ingestion. - # Ref: https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py#L155 - database_name = ( - url_instance.database.lower() - if data_platform == "snowflake" - else url_instance.database - ) - if database_name.endswith(f"/{schema_name}"): - database_name = database_name[: -len(f"/{schema_name}")] - schema_name = ( - schema_name if exclude_dbname else f"{database_name}.{schema_name}" - ) - - elif data_platform == "bigquery": - if url_instance.host is None or url_instance.database is None: - warn( - "DataHubValidationAction failed to locate host and/or database name for {data_platform}. ".format( - data_platform=data_platform - ) - ) - return None - schema_name = f"{url_instance.host}.{url_instance.database}" - - schema_name = schema_name or url_instance.database - if schema_name is None: - warn( - f"DataHubValidationAction failed to locate schema name for {data_platform}." - ) - return None - - dataset_name = f"{schema_name}.{table_name}" - - if convert_urns_to_lowercase: - dataset_name = dataset_name.lower() - - dataset_urn = builder.make_dataset_urn_with_platform_instance( - platform=data_platform if platform_alias is None else platform_alias, - name=dataset_name, - platform_instance=platform_instance, - env=env, - ) - - return dataset_urn - - -@dataclass -class DataHubStdAssertion: - scope: Union[str, DatasetAssertionScope] - operator: Union[str, AssertionStdOperator] - aggregation: Union[str, AssertionStdAggregation] - parameters: Optional[AssertionStdParameters] = None - - -class DecimalEncoder(json.JSONEncoder): - def default(self, o): - if isinstance(o, Decimal): - return str(o) - return super().default(o) - - -def convert_to_string(var: Any) -> str: - try: - tmp = ( - str(var) - if isinstance(var, (str, int, float)) - else json.dumps(var, cls=DecimalEncoder) - ) - except TypeError as e: - logger.debug(e) - tmp = str(var) - return tmp - - -def warn(msg): - logger.warning(msg) +__all__ = ["DataHubValidationAction"] diff --git a/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py b/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py index fbf6f954f82bbc..764c2b42537bb4 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py +++ b/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py @@ -16,7 +16,7 @@ import datahub.emitter.mce_builder as builder import datahub.metadata.schema_classes as models from datahub.configuration.time_window_config import get_time_bucket -from datahub.emitter.mce_builder import get_sys_time, make_ts_millis +from datahub.emitter.mce_builder import get_sys_time, make_actor_urn, make_ts_millis from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.sql_parsing_builder import compute_upstream_fields from datahub.ingestion.api.closeable import Closeable @@ -25,6 +25,7 @@ from datahub.ingestion.graph.client import DataHubGraph from datahub.ingestion.source.usage.usage_common import BaseUsageConfig, UsageAggregator from datahub.metadata.urns import ( + CorpGroupUrn, CorpUserUrn, DataPlatformUrn, DatasetUrn, @@ -83,6 +84,14 @@ class LoggedQuery: default_schema: Optional[str] +@dataclasses.dataclass +class ObservedQuery(LoggedQuery): + query_hash: Optional[str] = None + usage_multiplier: int = 1 + # Use this to store addtitional key-value information about query for debugging + extra_info: Optional[dict] = None + + @dataclasses.dataclass class ViewDefinition: view_definition: str @@ -101,7 +110,7 @@ class QueryMetadata: query_type: QueryType lineage_type: str # from models.DatasetLineageTypeClass latest_timestamp: Optional[datetime] - actor: Optional[CorpUserUrn] + actor: Optional[Union[CorpUserUrn, CorpGroupUrn]] upstreams: List[UrnStr] # this is direct upstreams, which may be temp tables column_lineage: List[ColumnLineageInfo] @@ -160,7 +169,7 @@ class PreparsedQuery: confidence_score: float = 1.0 query_count: int = 1 - user: Optional[CorpUserUrn] = None + user: Optional[Union[CorpUserUrn, CorpGroupUrn]] = None timestamp: Optional[datetime] = None session_id: str = _MISSING_SESSION_ID query_type: QueryType = QueryType.UNKNOWN @@ -469,7 +478,10 @@ def is_allowed_table(self, urn: UrnStr) -> bool: return self._is_allowed_table(self._name_from_urn(urn)) def add( - self, item: Union[KnownQueryLineageInfo, KnownLineageMapping, PreparsedQuery] + self, + item: Union[ + KnownQueryLineageInfo, KnownLineageMapping, PreparsedQuery, ObservedQuery + ], ) -> None: if isinstance(item, KnownQueryLineageInfo): self.add_known_query_lineage(item) @@ -477,6 +489,17 @@ def add( self.add_known_lineage_mapping(item.upstream_urn, item.downstream_urn) elif isinstance(item, PreparsedQuery): self.add_preparsed_query(item) + elif isinstance(item, ObservedQuery): + self.add_observed_query( + query=item.query, + default_db=item.default_db, + default_schema=item.default_schema, + session_id=item.session_id, + usage_multiplier=item.usage_multiplier, + query_timestamp=item.timestamp, + user=make_actor_urn(item.user) if item.user else None, + query_hash=item.query_hash, + ) else: raise ValueError(f"Cannot add unknown item type: {type(item)}") @@ -612,13 +635,14 @@ def add_observed_query( default_db: Optional[str] = None, default_schema: Optional[str] = None, query_timestamp: Optional[datetime] = None, - user: Optional[CorpUserUrn] = None, + user: Optional[Union[CorpUserUrn, CorpGroupUrn]] = None, session_id: Optional[ str ] = None, # can only see temp tables with the same session usage_multiplier: int = 1, is_known_temp_table: bool = False, require_out_table_schema: bool = False, + query_hash: Optional[str] = None, ) -> None: """Add an observed query to the aggregator. @@ -662,8 +686,7 @@ def add_observed_query( if isinstance(parsed.debug_info.column_error, CooperativeTimeoutError): self.report.num_observed_queries_column_timeout += 1 - query_fingerprint = parsed.query_fingerprint - + query_fingerprint = query_hash or parsed.query_fingerprint self.add_preparsed_query( PreparsedQuery( query_id=query_fingerprint, @@ -909,7 +932,7 @@ def _run_sql_parser( schema_resolver: SchemaResolverInterface, session_id: str = _MISSING_SESSION_ID, timestamp: Optional[datetime] = None, - user: Optional[CorpUserUrn] = None, + user: Optional[Union[CorpUserUrn, CorpGroupUrn]] = None, ) -> SqlParsingResult: with self.report.sql_parsing_timer: parsed = sqlglot_lineage( @@ -995,7 +1018,7 @@ def gen_metadata(self) -> Iterable[MetadataChangeProposalWrapper]: yield from self._gen_lineage_mcps(queries_generated) yield from self._gen_remaining_queries(queries_generated) yield from self._gen_usage_statistics_mcps() - yield from self._gen_operation_mcps() + yield from self._gen_operation_mcps(queries_generated) def _gen_lineage_mcps( self, queries_generated: Set[QueryId] @@ -1135,6 +1158,9 @@ def _gen_lineage_for_downstream( upstream_aspect.fineGrainedLineages or None ) + if not upstream_aspect.upstreams and not upstream_aspect.fineGrainedLineages: + return + yield MetadataChangeProposalWrapper( entityUrn=downstream_urn, aspect=upstream_aspect, @@ -1432,7 +1458,9 @@ def _gen_usage_statistics_mcps(self) -> Iterable[MetadataChangeProposalWrapper]: # TODO: We should change the usage aggregator to return MCPWs directly. yield cast(MetadataChangeProposalWrapper, wu.metadata) - def _gen_operation_mcps(self) -> Iterable[MetadataChangeProposalWrapper]: + def _gen_operation_mcps( + self, queries_generated: Set[QueryId] + ) -> Iterable[MetadataChangeProposalWrapper]: if not self.generate_operations: return @@ -1440,6 +1468,12 @@ def _gen_operation_mcps(self) -> Iterable[MetadataChangeProposalWrapper]: for query_id in query_ids: yield from self._gen_operation_for_downstream(downstream_urn, query_id) + # Avoid generating the same query twice. + if query_id in queries_generated: + continue + queries_generated.add(query_id) + yield from self._gen_query(self._query_map[query_id], downstream_urn) + def _gen_operation_for_downstream( self, downstream_urn: UrnStr, query_id: QueryId ) -> Iterable[MetadataChangeProposalWrapper]: diff --git a/metadata-ingestion/src/datahub/testing/docker_utils.py b/metadata-ingestion/src/datahub/testing/docker_utils.py new file mode 100644 index 00000000000000..7c1c0304f480e6 --- /dev/null +++ b/metadata-ingestion/src/datahub/testing/docker_utils.py @@ -0,0 +1,70 @@ +import contextlib +import logging +import subprocess +from typing import Callable, Iterator, List, Optional, Union + +import pytest +import pytest_docker.plugin + +logger = logging.getLogger(__name__) + + +def is_responsive(container_name: str, port: int, hostname: Optional[str]) -> bool: + """A cheap way to figure out if a port is responsive on a container""" + if hostname: + cmd = f"docker exec {container_name} /bin/bash -c 'echo -n > /dev/tcp/{hostname}/{port}'" + else: + # use the hostname of the container + cmd = f"docker exec {container_name} /bin/bash -c 'c_host=`hostname`;echo -n > /dev/tcp/$c_host/{port}'" + ret = subprocess.run( + cmd, + shell=True, + ) + return ret.returncode == 0 + + +def wait_for_port( + docker_services: pytest_docker.plugin.Services, + container_name: str, + container_port: int, + hostname: Optional[str] = None, + timeout: float = 30.0, + pause: float = 0.5, + checker: Optional[Callable[[], bool]] = None, +) -> None: + try: + docker_services.wait_until_responsive( + timeout=timeout, + pause=pause, + check=( + checker + if checker + else lambda: is_responsive(container_name, container_port, hostname) + ), + ) + logger.info(f"Container {container_name} is ready!") + finally: + # use check=True to raise an error if command gave bad exit code + subprocess.run(f"docker logs {container_name}", shell=True, check=True) + + +@pytest.fixture(scope="module") +def docker_compose_runner( + docker_compose_command, docker_compose_project_name, docker_setup, docker_cleanup +): + @contextlib.contextmanager + def run( + compose_file_path: Union[str, List[str]], key: str, cleanup: bool = True + ) -> Iterator[pytest_docker.plugin.Services]: + with pytest_docker.plugin.get_docker_services( + docker_compose_command=docker_compose_command, + # We can remove the type ignore once this is merged: + # https://github.com/avast/pytest-docker/pull/108 + docker_compose_file=compose_file_path, # type: ignore + docker_compose_project_name=f"{docker_compose_project_name}-{key}", + docker_setup=docker_setup, + docker_cleanup=docker_cleanup if cleanup else [], + ) as docker_services: + yield docker_services + + return run diff --git a/metadata-ingestion/tests/integration/bigquery_v2/bigquery_project_label_mcp_golden.json b/metadata-ingestion/tests/integration/bigquery_v2/bigquery_project_label_mcp_golden.json new file mode 100644 index 00000000000000..a529ddc6221a7a --- /dev/null +++ b/metadata-ingestion/tests/integration/bigquery_v2/bigquery_project_label_mcp_golden.json @@ -0,0 +1,452 @@ +[ +{ + "entityType": "container", + "entityUrn": "urn:li:container:f284164f9a7db03ca6bbdb7bb17d5a7e", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "bigquery", + "env": "PROD", + "project_id": "dev" + }, + "name": "dev" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f284164f9a7db03ca6bbdb7bb17d5a7e", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f284164f9a7db03ca6bbdb7bb17d5a7e", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f284164f9a7db03ca6bbdb7bb17d5a7e", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Project" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f284164f9a7db03ca6bbdb7bb17d5a7e", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ce17940c2d64e7e315e68f8d7d071b1e", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "bigquery", + "env": "PROD", + "project_id": "dev", + "dataset_id": "bigquery-dataset-1" + }, + "externalUrl": "https://console.cloud.google.com/bigquery?project=dev&ws=!1m4!1m3!3m2!1sdev!2sbigquery-dataset-1", + "name": "bigquery-dataset-1" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ce17940c2d64e7e315e68f8d7d071b1e", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ce17940c2d64e7e315e68f8d7d071b1e", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ce17940c2d64e7e315e68f8d7d071b1e", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Dataset" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ce17940c2d64e7e315e68f8d7d071b1e", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f284164f9a7db03ca6bbdb7bb17d5a7e" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ce17940c2d64e7e315e68f8d7d071b1e", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:f284164f9a7db03ca6bbdb7bb17d5a7e", + "urn": "urn:li:container:f284164f9a7db03ca6bbdb7bb17d5a7e" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,dev.bigquery-dataset-1.table-1,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,dev.bigquery-dataset-1.table-1,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "dev.bigquery-dataset-1.table-1", + "platform": "urn:li:dataPlatform:bigquery", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "age", + "nullable": false, + "description": "comment", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "INT", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Test Policy Tag" + } + ] + }, + "glossaryTerms": { + "terms": [ + { + "urn": "urn:li:glossaryTerm:Age" + } + ], + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:datahub" + } + }, + "isPartOfKey": false + }, + { + "fieldPath": "email", + "nullable": false, + "description": "comment", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "STRING", + "recursive": false, + "globalTags": { + "tags": [] + }, + "glossaryTerms": { + "terms": [ + { + "urn": "urn:li:glossaryTerm:Email_Address" + } + ], + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:datahub" + } + }, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,dev.bigquery-dataset-1.table-1,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": {}, + "externalUrl": "https://console.cloud.google.com/bigquery?project=dev&ws=!1m5!1m4!4m3!1sdev!2sbigquery-dataset-1!3stable-1", + "name": "table-1", + "qualifiedName": "dev.bigquery-dataset-1.table-1", + "description": "", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,dev.bigquery-dataset-1.table-1,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:ce17940c2d64e7e315e68f8d7d071b1e" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,dev.bigquery-dataset-1.table-1,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:bigquery,dev)" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,dev.bigquery-dataset-1.table-1,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,dev.bigquery-dataset-1.table-1,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:f284164f9a7db03ca6bbdb7bb17d5a7e", + "urn": "urn:li:container:f284164f9a7db03ca6bbdb7bb17d5a7e" + }, + { + "id": "urn:li:container:ce17940c2d64e7e315e68f8d7d071b1e", + "urn": "urn:li:container:ce17940c2d64e7e315e68f8d7d071b1e" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "glossaryTerm", + "entityUrn": "urn:li:glossaryTerm:Age", + "changeType": "UPSERT", + "aspectName": "glossaryTermKey", + "aspect": { + "json": { + "name": "Age" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "glossaryTerm", + "entityUrn": "urn:li:glossaryTerm:Email_Address", + "changeType": "UPSERT", + "aspectName": "glossaryTermKey", + "aspect": { + "json": { + "name": "Email_Address" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:Test Policy Tag", + "changeType": "UPSERT", + "aspectName": "tagKey", + "aspect": { + "json": { + "name": "Test Policy Tag" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/bigquery_v2/bigquery_queries_mcps_golden.json b/metadata-ingestion/tests/integration/bigquery_v2/bigquery_queries_mcps_golden.json new file mode 100644 index 00000000000000..2f12848ddfc695 --- /dev/null +++ b/metadata-ingestion/tests/integration/bigquery_v2/bigquery_queries_mcps_golden.json @@ -0,0 +1,5678 @@ +[ +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_3.derived_table,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1724050800000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 1724322481569, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_3.base_table_2,PROD)", + "type": "TRANSFORMED", + "query": "urn:li:query:88c4674d369ef49e881a5ea67ed3485e48f09b9a4924d5282c3ae25004737f95" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:88c4674d369ef49e881a5ea67ed3485e48f09b9a4924d5282c3ae25004737f95", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "\n create or replace table `gcp-staging-2.smoke_test_db_3.derived_table` as (select * from `gcp-staging-2.smoke_test_db_3.base_table_2`);\n ", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 1721549585813, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "lastModified": { + "time": 1721549585813, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + } + } + }, + "systemMetadata": { + "lastObserved": 1724065178195, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.lineage_from_base,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1724050800000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 1724322460257, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.base_table,PROD)", + "type": "TRANSFORMED", + "query": "urn:li:query:8f7bb4efb71d494b2bfe115937d6022db0ab9e6ea3d293839a457480e75430e1" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:88c4674d369ef49e881a5ea67ed3485e48f09b9a4924d5282c3ae25004737f95", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + }, + "systemMetadata": { + "lastObserved": 1724065178196, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:88c4674d369ef49e881a5ea67ed3485e48f09b9a4924d5282c3ae25004737f95", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_3.base_table_2,PROD)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_3.derived_table,PROD)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724065178196, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:630f9169072a11dfd8d08a44479f2466acdf2dc2b078b946a739db437b74ad1d", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724064498949, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.lineage_from_tmp_table,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1724050800000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 1724322457731, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.base_table,PROD)", + "type": "TRANSFORMED", + "query": "urn:li:query:composite_0db44e02f671b69df68565346e9d2b68c7166fccf75bd494f34560bfa16c381b" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:composite_0db44e02f671b69df68565346e9d2b68c7166fccf75bd494f34560bfa16c381b", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "create or replace table _smoke_test_db_tmp_tables.tmp_table as (select * from smoke_test_db.base_table);\n\ncreate or replace table smoke_test_db.lineage_from_tmp_table as (select * from _smoke_test_db_tmp_tables.tmp_table)", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 1721549567376, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "lastModified": { + "time": 1721549567376, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + } + } + }, + "systemMetadata": { + "lastObserved": 1724064997222, + "runId": "bigquery-queries-2024_08_19-16_26_33", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:6e250c6966754a5e6532fbb444172dacf5813b0b7afceefbf7772a29878f48f8", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "\n create or replace table `gcp-staging-2.smoke_test_db_4.derived_table_from_external_table` as (select * from `gcp-staging-2.smoke_test_db_4.external_table_us_states`);\n ", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 1721549602653, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "lastModified": { + "time": 1721549602653, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + } + } + }, + "systemMetadata": { + "lastObserved": 1724065178198, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.derived_table_from_external_table,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1724050800000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 1724322505477, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.external_table_us_states,PROD)", + "type": "TRANSFORMED", + "query": "urn:li:query:6e250c6966754a5e6532fbb444172dacf5813b0b7afceefbf7772a29878f48f8" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:6e250c6966754a5e6532fbb444172dacf5813b0b7afceefbf7772a29878f48f8", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + }, + "systemMetadata": { + "lastObserved": 1724065178200, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:6e250c6966754a5e6532fbb444172dacf5813b0b7afceefbf7772a29878f48f8", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.external_table_us_states,PROD)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.derived_table_from_external_table,PROD)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724065178199, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.derived_table_from_timetravelled_table,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1724050800000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 1724322508214, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.base_table,PROD)", + "type": "TRANSFORMED", + "query": "urn:li:query:2fa44cc9d306c7523477fad59ff43e2e580081ee770da69b9b9f66e119b4dcab" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:composite_0db44e02f671b69df68565346e9d2b68c7166fccf75bd494f34560bfa16c381b", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.base_table,PROD)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.lineage_from_tmp_table,PROD)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724064997223, + "runId": "bigquery-queries-2024_08_19-16_26_33", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.materialized_view_from_table,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1724050800000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 1724322475572, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.base_table,PROD)", + "type": "TRANSFORMED", + "query": "urn:li:query:e1769381f2d261efecb105f3ab6fc8a2fc6717a1509cc65ba125c03841b0923d" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.derived_table_from_sharded_table,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1724050800000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 1724322500148, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.sharded_table1,PROD)", + "type": "TRANSFORMED", + "query": "urn:li:query:49b267d0cd050c6a45b4d26bcdc6d9ddceb51aa7ed29399c52ef967e8da2b58d" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:composite_0db44e02f671b69df68565346e9d2b68c7166fccf75bd494f34560bfa16c381b", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + }, + "systemMetadata": { + "lastObserved": 1724064997223, + "runId": "bigquery-queries-2024_08_19-16_26_33", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:630f9169072a11dfd8d08a44479f2466acdf2dc2b078b946a739db437b74ad1d", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "\n create or replace view smoke_test_db.view_from_view_on_table\n as (select * from smoke_test_db.view_from_table)\n ", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 1721549572202, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "lastModified": { + "time": 1721549572202, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + } + } + }, + "systemMetadata": { + "lastObserved": 1724064498921, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:49b267d0cd050c6a45b4d26bcdc6d9ddceb51aa7ed29399c52ef967e8da2b58d", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "\n create or replace table `gcp-staging-2.smoke_test_db_4.derived_table_from_sharded_table` as (select * from `gcp-staging-2.smoke_test_db_4.sharded_table1_20230101`);\n ", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 1721549598252, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "lastModified": { + "time": 1721549598252, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + } + } + }, + "systemMetadata": { + "lastObserved": 1724065178202, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:630f9169072a11dfd8d08a44479f2466acdf2dc2b078b946a739db437b74ad1d", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + }, + "systemMetadata": { + "lastObserved": 1724064498922, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.destination_table_of_select_query,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1724050800000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 1724322510656, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.base_table,PROD)", + "type": "TRANSFORMED", + "query": "urn:li:query:44c19e1fccfa56779f6958f62d3476819b48af701bbee43ccecf5c9e04d63fc4" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:44c19e1fccfa56779f6958f62d3476819b48af701bbee43ccecf5c9e04d63fc4", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "CREATE TABLE `gcp-staging-2.smoke_test_db_4.destination_table_of_select_query` AS\n (\n SELECT * FROM `gcp-staging.smoke_test_db.base_table`\n )", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 1724322510656, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "lastModified": { + "time": 1724322510656, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + } + } + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:44c19e1fccfa56779f6958f62d3476819b48af701bbee43ccecf5c9e04d63fc4", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.base_table,PROD)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.destination_table_of_select_query,PROD)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:44c19e1fccfa56779f6958f62d3476819b48af701bbee43ccecf5c9e04d63fc4", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:49b267d0cd050c6a45b4d26bcdc6d9ddceb51aa7ed29399c52ef967e8da2b58d", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + }, + "systemMetadata": { + "lastObserved": 1724065178203, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:49b267d0cd050c6a45b4d26bcdc6d9ddceb51aa7ed29399c52ef967e8da2b58d", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.sharded_table1,PROD)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.derived_table_from_sharded_table,PROD)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724065178203, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:630f9169072a11dfd8d08a44479f2466acdf2dc2b078b946a739db437b74ad1d", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_table,PROD)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_view_on_table,PROD)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724064498922, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:b9c1cbdddc1018284bdfb113865f5dc95b5c5a106c8e1dad1297bc9ef70debf1", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724064498952, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.table_from_view,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1724050800000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 1724322465459, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_table,PROD)", + "type": "TRANSFORMED", + "query": "urn:li:query:b66112fa9691aa02354115b5cef8356390b524fa67c6b06e018c362ac8d0b31d" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:b9c1cbdddc1018284bdfb113865f5dc95b5c5a106c8e1dad1297bc9ef70debf1", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "\n create or replace table smoke_test_db_2.table_from_other_db\n as (select * from smoke_test_db.base_table)\n ", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 1721549574990, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "lastModified": { + "time": 1721549574990, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + } + } + }, + "systemMetadata": { + "lastObserved": 1724064498946, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:2fa44cc9d306c7523477fad59ff43e2e580081ee770da69b9b9f66e119b4dcab", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "\n create or replace table `gcp-staging-2.smoke_test_db_4.derived_table_from_timetravelled_table` as (\n SELECT *\n FROM `gcp-staging.smoke_test_db.base_table`\n FOR SYSTEM_TIME AS OF TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 1 HOUR)\n );\n ", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 1721549605511, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "lastModified": { + "time": 1721549605511, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + } + } + }, + "systemMetadata": { + "lastObserved": 1724065178206, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:bb1e3c0fd1f6a26c2645cf4ba088a22ff346a9e323c6be451459ecfa7329a991", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "\n create or replace view smoke_test_db.view_from_multiple_tables\n as \n (\n select a.date_utc, a.revenue, b.revenue as revenue2, c.revenue as revenue3 \n from \n smoke_test_db.base_table a\n left join \n smoke_test_db.lineage_from_base b \n on a.date_utc = b.date_utc \n left join \n smoke_test_db_2.table_from_other_db c \n on b.date_utc = c.date_utc \n );\n ", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 1721549588112, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "lastModified": { + "time": 1721549588112, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + } + } + }, + "systemMetadata": { + "lastObserved": 1724064498924, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:a049f27174fa88a7a7b1b7d5f60d2c353f3e9dd3d4994a8e35c91adb986eac4d", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "\n create or replace table `gcp-staging-2.smoke_test_db_4.sharded_table1_20230201` OPTIONS(description=\"Description of sharded table ending with _yyyyMMdd\") as (select * from `gcp-staging.smoke_test_db_2.table_from_other_db`);\n ", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 1721549591093, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "lastModified": { + "time": 1721549591093, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + } + } + }, + "systemMetadata": { + "lastObserved": 1724065178217, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:2fa44cc9d306c7523477fad59ff43e2e580081ee770da69b9b9f66e119b4dcab", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + }, + "systemMetadata": { + "lastObserved": 1724065178207, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:a049f27174fa88a7a7b1b7d5f60d2c353f3e9dd3d4994a8e35c91adb986eac4d", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + }, + "systemMetadata": { + "lastObserved": 1724065178218, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:a049f27174fa88a7a7b1b7d5f60d2c353f3e9dd3d4994a8e35c91adb986eac4d", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db_2.table_from_other_db,PROD)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.sharded_table1,PROD)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724065178217, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.sharded_table1,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1724050800000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 1724322485618, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db_2.table_from_other_db,PROD)", + "type": "TRANSFORMED", + "query": "urn:li:query:a049f27174fa88a7a7b1b7d5f60d2c353f3e9dd3d4994a8e35c91adb986eac4d" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:2fa44cc9d306c7523477fad59ff43e2e580081ee770da69b9b9f66e119b4dcab", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.base_table,PROD)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.derived_table_from_timetravelled_table,PROD)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724065178207, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:6684f16158660588d874f7ac46dbd7e56ad42acfb95b8a3d1f01292de8dcb930", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "\n create or replace table `gcp-staging-2.smoke_test_db_4.derived_table_from_wildcard_table` as (select * from `gcp-staging-2.smoke_test_db_4.sharded_table1_*`);\n ", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 1721549600590, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "lastModified": { + "time": 1721549600590, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + } + } + }, + "systemMetadata": { + "lastObserved": 1724065178211, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:bb1e3c0fd1f6a26c2645cf4ba088a22ff346a9e323c6be451459ecfa7329a991", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.base_table,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.base_table,PROD),date_utc)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.base_table,PROD),revenue)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.lineage_from_base,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.lineage_from_base,PROD),revenue)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db_2.table_from_other_db,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db_2.table_from_other_db,PROD),revenue)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_multiple_tables,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_multiple_tables,PROD),date_utc)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_multiple_tables,PROD),revenue)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_multiple_tables,PROD),revenue2)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_multiple_tables,PROD),revenue3)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724064498924, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:6684f16158660588d874f7ac46dbd7e56ad42acfb95b8a3d1f01292de8dcb930", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + }, + "systemMetadata": { + "lastObserved": 1724065178212, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:5efacf0ddad8fd852ba394b29e7a4654ea454915930fb8dd4882c6f294b95cf8", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "\n create or replace table `gcp-staging.smoke_test_db.table_from_another_project` as (select * from `gcp-staging-2.smoke_test_db_3.base_table_2`);\n ", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 1721549583501, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "lastModified": { + "time": 1721549583501, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + } + } + }, + "systemMetadata": { + "lastObserved": 1724065178230, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:6684f16158660588d874f7ac46dbd7e56ad42acfb95b8a3d1f01292de8dcb930", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.sharded_table1,PROD)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.derived_table_from_wildcard_table,PROD)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724065178211, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:5efacf0ddad8fd852ba394b29e7a4654ea454915930fb8dd4882c6f294b95cf8", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + }, + "systemMetadata": { + "lastObserved": 1724065178231, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:5efacf0ddad8fd852ba394b29e7a4654ea454915930fb8dd4882c6f294b95cf8", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_3.base_table_2,PROD)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.table_from_another_project,PROD)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724065178231, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:b9c1cbdddc1018284bdfb113865f5dc95b5c5a106c8e1dad1297bc9ef70debf1", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + }, + "systemMetadata": { + "lastObserved": 1724064498946, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.derived_table_from_wildcard_table,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1724050800000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 1724322502689, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.sharded_table1,PROD)", + "type": "TRANSFORMED", + "query": "urn:li:query:6684f16158660588d874f7ac46dbd7e56ad42acfb95b8a3d1f01292de8dcb930" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:bb1e3c0fd1f6a26c2645cf4ba088a22ff346a9e323c6be451459ecfa7329a991", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + }, + "systemMetadata": { + "lastObserved": 1724064498924, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.table_from_view_and_table,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1724050800000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 1724322472836, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.base_table,PROD)", + "type": "TRANSFORMED", + "query": "urn:li:query:a4d62b3996203c5661d02d28c1908d209a56e9966cefc274600a76335bc75de0" + }, + { + "auditStamp": { + "time": 1724050800000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 1724322472836, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_table,PROD)", + "type": "TRANSFORMED", + "query": "urn:li:query:a4d62b3996203c5661d02d28c1908d209a56e9966cefc274600a76335bc75de0" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.base_table,PROD),date_utc)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.table_from_view_and_table,PROD),date_utc)" + ], + "confidenceScore": 0.2, + "query": "urn:li:query:a4d62b3996203c5661d02d28c1908d209a56e9966cefc274600a76335bc75de0" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_table,PROD),revenue)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.table_from_view_and_table,PROD),revenue)" + ], + "confidenceScore": 0.2, + "query": "urn:li:query:a4d62b3996203c5661d02d28c1908d209a56e9966cefc274600a76335bc75de0" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:b9c1cbdddc1018284bdfb113865f5dc95b5c5a106c8e1dad1297bc9ef70debf1", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.base_table,PROD)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db_2.table_from_other_db,PROD)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724064498946, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_multiple_tables,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1724050800000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 1724322484293, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.base_table,PROD)", + "type": "TRANSFORMED", + "query": "urn:li:query:bb1e3c0fd1f6a26c2645cf4ba088a22ff346a9e323c6be451459ecfa7329a991" + }, + { + "auditStamp": { + "time": 1724050800000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 1724322484293, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.lineage_from_base,PROD)", + "type": "TRANSFORMED", + "query": "urn:li:query:bb1e3c0fd1f6a26c2645cf4ba088a22ff346a9e323c6be451459ecfa7329a991" + }, + { + "auditStamp": { + "time": 1724050800000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 1724322484293, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db_2.table_from_other_db,PROD)", + "type": "TRANSFORMED", + "query": "urn:li:query:bb1e3c0fd1f6a26c2645cf4ba088a22ff346a9e323c6be451459ecfa7329a991" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.base_table,PROD),date_utc)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_multiple_tables,PROD),date_utc)" + ], + "confidenceScore": 0.2, + "query": "urn:li:query:bb1e3c0fd1f6a26c2645cf4ba088a22ff346a9e323c6be451459ecfa7329a991" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.base_table,PROD),revenue)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_multiple_tables,PROD),revenue)" + ], + "confidenceScore": 0.2, + "query": "urn:li:query:bb1e3c0fd1f6a26c2645cf4ba088a22ff346a9e323c6be451459ecfa7329a991" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.lineage_from_base,PROD),revenue)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_multiple_tables,PROD),revenue2)" + ], + "confidenceScore": 0.2, + "query": "urn:li:query:bb1e3c0fd1f6a26c2645cf4ba088a22ff346a9e323c6be451459ecfa7329a991" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db_2.table_from_other_db,PROD),revenue)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_multiple_tables,PROD),revenue3)" + ], + "confidenceScore": 0.2, + "query": "urn:li:query:bb1e3c0fd1f6a26c2645cf4ba088a22ff346a9e323c6be451459ecfa7329a991" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:005ef53d98fe9ce2d807a16f00695367e6923b11729f2fba0db3e694bd2fe9c9", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "\n insert into smoke_test_db.usage_test values\n (\"2022-05-01\", \"seven\", 7),\n (\"2022-05-02\", \"ten\", 10),\n (\"2022-06-01\", \"four\", 4)\n ", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 1721549557813, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "lastModified": { + "time": 1721549557813, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + } + } + }, + "systemMetadata": { + "lastObserved": 1724064498926, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:005ef53d98fe9ce2d807a16f00695367e6923b11729f2fba0db3e694bd2fe9c9", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.usage_test,PROD)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724064498927, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_snapshot_on_table,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1724050800000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 1724322471500, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.snapshot_from_table,PROD)", + "type": "TRANSFORMED", + "query": "urn:li:query:dc49a3d580b4df6c8d24961c39a18b3569d8e58783fe9895324876da32d98d1e" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:dc49a3d580b4df6c8d24961c39a18b3569d8e58783fe9895324876da32d98d1e", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724064498953, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.table_from_another_project,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1724050800000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 1724322478955, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_3.base_table_2,PROD)", + "type": "TRANSFORMED", + "query": "urn:li:query:5efacf0ddad8fd852ba394b29e7a4654ea454915930fb8dd4882c6f294b95cf8" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:005ef53d98fe9ce2d807a16f00695367e6923b11729f2fba0db3e694bd2fe9c9", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + }, + "systemMetadata": { + "lastObserved": 1724064498927, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:dc49a3d580b4df6c8d24961c39a18b3569d8e58783fe9895324876da32d98d1e", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "\n create or replace view smoke_test_db.view_from_snapshot_on_table\n as (select * from smoke_test_db.snapshot_from_table)\n ", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 1721549577574, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "lastModified": { + "time": 1721549577574, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + } + } + }, + "systemMetadata": { + "lastObserved": 1724064498924, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_table,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1724050800000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 1724322462741, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.base_table,PROD)", + "type": "TRANSFORMED", + "query": "urn:li:query:bb3d7f6685e1f71868d0821451e52bfcf1a3bdfeb34c739c0305386256c38f9b" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_view_on_table,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1724050800000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 1724322464098, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_table,PROD)", + "type": "TRANSFORMED", + "query": "urn:li:query:630f9169072a11dfd8d08a44479f2466acdf2dc2b078b946a739db437b74ad1d" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:dc49a3d580b4df6c8d24961c39a18b3569d8e58783fe9895324876da32d98d1e", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + }, + "systemMetadata": { + "lastObserved": 1724064498925, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:dc49a3d580b4df6c8d24961c39a18b3569d8e58783fe9895324876da32d98d1e", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.snapshot_from_table,PROD)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_snapshot_on_table,PROD)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724064498925, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db_2.table_from_other_db,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1724050800000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 1724322467835, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.base_table,PROD)", + "type": "TRANSFORMED", + "query": "urn:li:query:b9c1cbdddc1018284bdfb113865f5dc95b5c5a106c8e1dad1297bc9ef70debf1" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.snapshot_from_table,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetUsageStatistics", + "aspect": { + "json": { + "timestampMillis": 1721520000000, + "eventGranularity": { + "unit": "DAY", + "multiple": 1 + }, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "uniqueUserCount": 1, + "totalSqlQueries": 1, + "topSqlQueries": [ + "\n create or replace view smoke_test_db.view_from_snapshot_on_table\n as (select * from smoke_test_db.snapshot_from_table)\n " + ], + "userCounts": [ + { + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "count": 1 + } + ], + "fieldCounts": [] + } + }, + "systemMetadata": { + "lastObserved": 1724064997260, + "runId": "bigquery-queries-2024_08_19-16_26_33", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:bb3d7f6685e1f71868d0821451e52bfcf1a3bdfeb34c739c0305386256c38f9b", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "\n create or replace view smoke_test_db.view_from_table\n as (select * from smoke_test_db.base_table)\n ", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 1721549571609, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "lastModified": { + "time": 1721549571609, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + } + } + }, + "systemMetadata": { + "lastObserved": 1724064498930, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.lineage_from_base,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetUsageStatistics", + "aspect": { + "json": { + "timestampMillis": 1721520000000, + "eventGranularity": { + "unit": "DAY", + "multiple": 1 + }, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "uniqueUserCount": 2, + "totalSqlQueries": 3, + "topSqlQueries": [ + "select revenue, date_utc from gcp-staging.smoke_test_db.lineage_from_base", + "select revenue from gcp-staging.smoke_test_db.lineage_from_base", + "\n create or replace view smoke_test_db.view_from_multiple_tables\n as \n (\n select a.date_utc, a.revenue, b.revenue as revenue2, c.revenue as revenue3 \n from \n smoke_test_db.base_table a\n left join \n smoke_test_db.lineage_from_base b \n on a.date_utc = b.date_utc \n left join \n smoke_test_db_2.table_from_other_db c \n on b.date_utc = c.date_utc \n );\n " + ], + "userCounts": [ + { + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "count": 2 + }, + { + "user": "urn:li:corpuser:dh-bigquery-smoke-test-2", + "count": 1 + } + ], + "fieldCounts": [ + { + "fieldPath": "revenue", + "count": 3 + }, + { + "fieldPath": "date_utc", + "count": 1 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724065178262, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:bb3d7f6685e1f71868d0821451e52bfcf1a3bdfeb34c739c0305386256c38f9b", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.base_table,PROD)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_table,PROD)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724064498930, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.base_table,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetUsageStatistics", + "aspect": { + "json": { + "timestampMillis": 1724284800000, + "eventGranularity": { + "unit": "DAY", + "multiple": 1 + }, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "uniqueUserCount": 2, + "totalSqlQueries": 13, + "topSqlQueries": [ + "create or replace table smoke_test_db.lineage_from_base as (select * from smoke_test_db.base_table)", + "\n create or replace table smoke_test_db_2.table_from_other_db\n as (select * from smoke_test_db.base_table)\n ", + "\n create or replace table smoke_test_db.table_from_view_and_table\n as (select b.date_utc, v.revenue from smoke_test_db.base_table b, smoke_test_db.view_from_table v)\n ", + "\n create or replace table `gcp-staging-2.smoke_test_db_4.derived_table_from_timetravelled_table` as (\n SELECT *\n FROM `gcp-staging.smoke_test_db.base_table`\n FOR SYSTEM_TIME AS OF TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 1 HOUR)\n );\n ", + "select revenue, date_utc from gcp-staging.smoke_test_db.base_table", + "\n create or replace view smoke_test_db.view_from_table\n as (select * from smoke_test_db.base_table)\n ", + "create or replace table _smoke_test_db_tmp_tables.tmp_table as (select * from smoke_test_db.base_table)", + "select revenue from gcp-staging.smoke_test_db.base_table FOR SYSTEM_TIME AS OF TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 1 HOUR)", + "select revenue from gcp-staging.smoke_test_db.base_table", + "\n create or replace view smoke_test_db.view_from_multiple_tables\n as \n (\n select a.date_utc, a.revenue, b.revenue as revenue2, c.revenue as revenue3 \n from \n smoke_test_db.base_table a\n left join \n smoke_test_db.lineage_from_base b \n on a.date_utc = b.date_utc \n left join \n smoke_test_db_2.table_from_other_db c \n on b.date_utc = c.date_utc \n );\n ", + "create MATERIALIZED VIEW smoke_test_db.materialized_view_from_table as (select * from smoke_test_db.base_table where revenue>100)", + "CREATE TABLE `gcp-staging-2.smoke_test_db_4.destination_table_of_select_query` AS\n (\n SELECT * FROM `gcp-staging.smoke_test_db.base_table`\n )", + "select revenue, date_utc from gcp-staging.smoke_test_db.base_table FOR SYSTEM_TIME AS OF TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 1 HOUR)" + ], + "userCounts": [ + { + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "count": 11 + }, + { + "user": "urn:li:corpuser:dh-bigquery-smoke-test-2", + "count": 2 + } + ], + "fieldCounts": [ + { + "fieldPath": "revenue", + "count": 5 + }, + { + "fieldPath": "date_utc", + "count": 4 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db_2.table_from_other_db,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetUsageStatistics", + "aspect": { + "json": { + "timestampMillis": 1721520000000, + "eventGranularity": { + "unit": "DAY", + "multiple": 1 + }, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "uniqueUserCount": 2, + "totalSqlQueries": 5, + "topSqlQueries": [ + "\n create or replace table `gcp-staging-2.smoke_test_db_4.sharded_table1_20230101` OPTIONS(description=\"Description of sharded table ending with _yyyyMMdd\") as (select * from `gcp-staging.smoke_test_db_2.table_from_other_db`) ;\n ", + "select revenue from gcp-staging.smoke_test_db_2.table_from_other_db", + "\n create or replace table `gcp-staging-2.smoke_test_db_4.sharded_table1_20230201` OPTIONS(description=\"Description of sharded table ending with _yyyyMMdd\") as (select * from `gcp-staging.smoke_test_db_2.table_from_other_db`);\n ", + "\n create or replace view smoke_test_db.view_from_multiple_tables\n as \n (\n select a.date_utc, a.revenue, b.revenue as revenue2, c.revenue as revenue3 \n from \n smoke_test_db.base_table a\n left join \n smoke_test_db.lineage_from_base b \n on a.date_utc = b.date_utc \n left join \n smoke_test_db_2.table_from_other_db c \n on b.date_utc = c.date_utc \n );\n ", + "select revenue, date_utc from gcp-staging.smoke_test_db_2.table_from_other_db" + ], + "userCounts": [ + { + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "count": 4 + }, + { + "user": "urn:li:corpuser:dh-bigquery-smoke-test-2", + "count": 1 + } + ], + "fieldCounts": [ + { + "fieldPath": "revenue", + "count": 3 + }, + { + "fieldPath": "date_utc", + "count": 1 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724065178265, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_table,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetUsageStatistics", + "aspect": { + "json": { + "timestampMillis": 1721520000000, + "eventGranularity": { + "unit": "DAY", + "multiple": 1 + }, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "uniqueUserCount": 2, + "totalSqlQueries": 5, + "topSqlQueries": [ + "select revenue from gcp-staging.smoke_test_db.view_from_table", + "\n create or replace table smoke_test_db.table_from_view_and_table\n as (select b.date_utc, v.revenue from smoke_test_db.base_table b, smoke_test_db.view_from_table v)\n ", + "\n create or replace table smoke_test_db.table_from_view\n as (select * from smoke_test_db.view_from_table)\n ", + "\n create or replace view smoke_test_db.view_from_view_on_table\n as (select * from smoke_test_db.view_from_table)\n ", + "select revenue, date_utc from gcp-staging.smoke_test_db.view_from_table" + ], + "userCounts": [ + { + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "count": 4 + }, + { + "user": "urn:li:corpuser:dh-bigquery-smoke-test-2", + "count": 1 + } + ], + "fieldCounts": [ + { + "fieldPath": "revenue", + "count": 3 + }, + { + "fieldPath": "date_utc", + "count": 1 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724065178260, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:4f5fd82d4808115ef07900a543b7d6e3551899815d11a945870c607d2dbda56e", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "insert into smoke_test_db.partition_test values (\"2022-05-24\", 20), (\"2022-06-24\", 30)", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 1721549560560, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "lastModified": { + "time": 1721549560560, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + } + } + }, + "systemMetadata": { + "lastObserved": 1724064498928, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:bb3d7f6685e1f71868d0821451e52bfcf1a3bdfeb34c739c0305386256c38f9b", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + }, + "systemMetadata": { + "lastObserved": 1724064498930, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:4f5fd82d4808115ef07900a543b7d6e3551899815d11a945870c607d2dbda56e", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.partition_test,PROD)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724064498928, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:4f5fd82d4808115ef07900a543b7d6e3551899815d11a945870c607d2dbda56e", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + }, + "systemMetadata": { + "lastObserved": 1724064498929, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:cb345cc231c81ec7b871d6727437a87b5bc18a95ecf37e857f07096254c2d2c1", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "create or replace table smoke_test_db.usage_test (date_utc date, key STRING, value INTEGER)", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 1721549557257, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "lastModified": { + "time": 1721549557257, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + } + } + }, + "systemMetadata": { + "lastObserved": 1724064498927, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:cb345cc231c81ec7b871d6727437a87b5bc18a95ecf37e857f07096254c2d2c1", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.usage_test,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.usage_test,PROD),date_utc)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.usage_test,PROD),key)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.usage_test,PROD),value)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724064498927, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:e1769381f2d261efecb105f3ab6fc8a2fc6717a1509cc65ba125c03841b0923d", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724064498953, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:cb345cc231c81ec7b871d6727437a87b5bc18a95ecf37e857f07096254c2d2c1", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + }, + "systemMetadata": { + "lastObserved": 1724064498928, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.base_table,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1724064997269, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test", + "operationType": "INSERT", + "customProperties": { + "query_urn": "urn:li:query:ebfe552aa0ddb538f3a6c4d444aff757e6df574f16a6dffc3ac146ce587fc491" + }, + "lastUpdatedTimestamp": 1721549563403 + } + }, + "systemMetadata": { + "lastObserved": 1724064997270, + "runId": "bigquery-queries-2024_08_19-16_26_33", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:e1769381f2d261efecb105f3ab6fc8a2fc6717a1509cc65ba125c03841b0923d", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "create MATERIALIZED VIEW smoke_test_db.materialized_view_from_table as (select * from smoke_test_db.base_table where revenue>100)", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 1721549581208, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "lastModified": { + "time": 1721549581208, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + } + } + }, + "systemMetadata": { + "lastObserved": 1724064498938, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:e1769381f2d261efecb105f3ab6fc8a2fc6717a1509cc65ba125c03841b0923d", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + }, + "systemMetadata": { + "lastObserved": 1724064498938, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_snapshot_on_table,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1724064997268, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test", + "operationType": "CREATE", + "customProperties": { + "query_urn": "urn:li:query:dc49a3d580b4df6c8d24961c39a18b3569d8e58783fe9895324876da32d98d1e" + }, + "lastUpdatedTimestamp": 1721549577574 + } + }, + "systemMetadata": { + "lastObserved": 1724064997270, + "runId": "bigquery-queries-2024_08_19-16_26_33", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.base_table,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1724064997271, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test", + "operationType": "CREATE", + "customProperties": { + "query_urn": "urn:li:query:53d20616e4dd30dfc16ccc5771998f5ed93c9afa9b846104a19d072ba364fb5c" + }, + "lastUpdatedTimestamp": 1721549562792 + } + }, + "systemMetadata": { + "lastObserved": 1724064997273, + "runId": "bigquery-queries-2024_08_19-16_26_33", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.table_from_view_and_table,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1724064997267, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test", + "operationType": "CREATE", + "customProperties": { + "query_urn": "urn:li:query:a4d62b3996203c5661d02d28c1908d209a56e9966cefc274600a76335bc75de0" + }, + "lastUpdatedTimestamp": 1721549578210 + } + }, + "systemMetadata": { + "lastObserved": 1724064997269, + "runId": "bigquery-queries-2024_08_19-16_26_33", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:e1769381f2d261efecb105f3ab6fc8a2fc6717a1509cc65ba125c03841b0923d", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.base_table,PROD)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.materialized_view_from_table,PROD)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724064498938, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:4bd08adca1e16a1e10673f736ae2c91e0ee68fd56b187bd507f360e429dbcb8c", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724064498948, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:4bd08adca1e16a1e10673f736ae2c91e0ee68fd56b187bd507f360e429dbcb8c", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "\n create or replace table smoke_test_db.partition_test (date_utc date, revenue INTEGER) \n PARTITION BY date_utc\n ", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 1721549559855, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "lastModified": { + "time": 1721549559855, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + } + } + }, + "systemMetadata": { + "lastObserved": 1724064498929, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.lineage_from_tmp_table,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetUsageStatistics", + "aspect": { + "json": { + "timestampMillis": 1721520000000, + "eventGranularity": { + "unit": "DAY", + "multiple": 1 + }, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "uniqueUserCount": 2, + "totalSqlQueries": 2, + "topSqlQueries": [ + "select revenue from gcp-staging.smoke_test_db.lineage_from_tmp_table", + "select revenue, date_utc from gcp-staging.smoke_test_db.lineage_from_tmp_table" + ], + "userCounts": [ + { + "user": "urn:li:corpuser:dh-bigquery-smoke-test-2", + "count": 1 + }, + { + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "count": 1 + } + ], + "fieldCounts": [ + { + "fieldPath": "revenue", + "count": 2 + }, + { + "fieldPath": "date_utc", + "count": 1 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724065178256, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:4bd08adca1e16a1e10673f736ae2c91e0ee68fd56b187bd507f360e429dbcb8c", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + }, + "systemMetadata": { + "lastObserved": 1724064498930, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.partition_test,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetUsageStatistics", + "aspect": { + "json": { + "timestampMillis": 1724284800000, + "eventGranularity": { + "unit": "DAY", + "multiple": 1 + }, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "uniqueUserCount": 2, + "totalSqlQueries": 15, + "topSqlQueries": [ + "\n select revenue from gcp-staging.smoke_test_db.partition_test t\n where cast(t.date_utc as DATE) < (select max(date_utc) from gcp-staging.smoke_test_db.partition_test)\n", + "\n select extract(month from date_utc) month, count(*) count, sum(revenue) gross from gcp-staging.smoke_test_db.partition_test\n group by month\n order by gross\n", + "\n select revenue from gcp-staging.smoke_test_db.partition_test t\n where cast(t.date_utc as DATE) = (select max(date_utc) from gcp-staging.smoke_test_db.partition_test)\n", + "select revenue, date_utc from gcp-staging.smoke_test_db.partition_test", + "select revenue from gcp-staging.smoke_test_db.partition_test" + ], + "userCounts": [ + { + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "count": 11 + }, + { + "user": "urn:li:corpuser:dh-bigquery-smoke-test-2", + "count": 4 + } + ], + "fieldCounts": [ + { + "fieldPath": "revenue", + "count": 15 + }, + { + "fieldPath": "date_utc", + "count": 6 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:4bd08adca1e16a1e10673f736ae2c91e0ee68fd56b187bd507f360e429dbcb8c", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.partition_test,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.partition_test,PROD),date_utc)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.partition_test,PROD),revenue)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724064498929, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db_2.table_from_other_db,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1724064997280, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test", + "operationType": "CREATE", + "customProperties": { + "query_urn": "urn:li:query:b9c1cbdddc1018284bdfb113865f5dc95b5c5a106c8e1dad1297bc9ef70debf1" + }, + "lastUpdatedTimestamp": 1721549574990 + } + }, + "systemMetadata": { + "lastObserved": 1724064997281, + "runId": "bigquery-queries-2024_08_19-16_26_33", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_multiple_tables,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1724064997278, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test", + "operationType": "CREATE", + "customProperties": { + "query_urn": "urn:li:query:bb1e3c0fd1f6a26c2645cf4ba088a22ff346a9e323c6be451459ecfa7329a991" + }, + "lastUpdatedTimestamp": 1721549588112 + } + }, + "systemMetadata": { + "lastObserved": 1724064997280, + "runId": "bigquery-queries-2024_08_19-16_26_33", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.table_from_view,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetUsageStatistics", + "aspect": { + "json": { + "timestampMillis": 1721520000000, + "eventGranularity": { + "unit": "DAY", + "multiple": 1 + }, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "uniqueUserCount": 2, + "totalSqlQueries": 2, + "topSqlQueries": [ + "select revenue, date_utc from gcp-staging.smoke_test_db.table_from_view", + "select revenue from gcp-staging.smoke_test_db.table_from_view" + ], + "userCounts": [ + { + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "count": 1 + }, + { + "user": "urn:li:corpuser:dh-bigquery-smoke-test-2", + "count": 1 + } + ], + "fieldCounts": [ + { + "fieldPath": "revenue", + "count": 2 + }, + { + "fieldPath": "date_utc", + "count": 1 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724065178259, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.materialized_view_from_table,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetUsageStatistics", + "aspect": { + "json": { + "timestampMillis": 1721520000000, + "eventGranularity": { + "unit": "DAY", + "multiple": 1 + }, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "uniqueUserCount": 2, + "totalSqlQueries": 2, + "topSqlQueries": [ + "select revenue from gcp-staging.smoke_test_db.materialized_view_from_table", + "select revenue, date_utc from gcp-staging.smoke_test_db.materialized_view_from_table" + ], + "userCounts": [ + { + "user": "urn:li:corpuser:dh-bigquery-smoke-test-2", + "count": 1 + }, + { + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "count": 1 + } + ], + "fieldCounts": [ + { + "fieldPath": "revenue", + "count": 2 + }, + { + "fieldPath": "date_utc", + "count": 1 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724065178271, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.table_with_integer_range_partition,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetUsageStatistics", + "aspect": { + "json": { + "timestampMillis": 1721520000000, + "eventGranularity": { + "unit": "DAY", + "multiple": 1 + }, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "uniqueUserCount": 1, + "totalSqlQueries": 1, + "topSqlQueries": [ + "\n SELECT\n customer_id,\n date1\n FROM\n `gcp-staging-2.smoke_test_db_4.table_with_integer_range_partition`\n WHERE\n customer_id=1\n" + ], + "userCounts": [ + { + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "count": 1 + } + ], + "fieldCounts": [ + { + "fieldPath": "customer_id", + "count": 1 + }, + { + "fieldPath": "date1", + "count": 1 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724065178264, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:2957e74d80b00aef4f3dc7b0b323d1fa863c78fd882d858186579c0737df00e2", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "create or replace table smoke_test_db.lineage_from_tmp_table as (select * from _smoke_test_db_tmp_tables.tmp_table)", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 1721549567376, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "lastModified": { + "time": 1721549567376, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + } + } + }, + "systemMetadata": { + "lastObserved": 1724064498932, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_table,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1724064997276, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test", + "operationType": "CREATE", + "customProperties": { + "query_urn": "urn:li:query:bb3d7f6685e1f71868d0821451e52bfcf1a3bdfeb34c739c0305386256c38f9b" + }, + "lastUpdatedTimestamp": 1721549571609 + } + }, + "systemMetadata": { + "lastObserved": 1724064997278, + "runId": "bigquery-queries-2024_08_19-16_26_33", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:2957e74d80b00aef4f3dc7b0b323d1fa863c78fd882d858186579c0737df00e2", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging._smoke_test_db_tmp_tables.tmp_table,PROD)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.lineage_from_tmp_table,PROD)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724064498933, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:2957e74d80b00aef4f3dc7b0b323d1fa863c78fd882d858186579c0737df00e2", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + }, + "systemMetadata": { + "lastObserved": 1724064498933, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.table_with_ingestion_time_partition,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetUsageStatistics", + "aspect": { + "json": { + "timestampMillis": 1721520000000, + "eventGranularity": { + "unit": "DAY", + "multiple": 1 + }, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "uniqueUserCount": 1, + "totalSqlQueries": 1, + "topSqlQueries": [ + "\n select \n transaction_id \n from \n `gcp-staging-2.smoke_test_db_4.table_with_ingestion_time_partition` \n where \n _PARTITIONDATE = CURRENT_DATE()\n" + ], + "userCounts": [ + { + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "count": 1 + } + ], + "fieldCounts": [ + { + "fieldPath": "transaction_id", + "count": 1 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724065178275, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.lineage_from_base,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1724064997286, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test", + "operationType": "CREATE", + "customProperties": { + "query_urn": "urn:li:query:8f7bb4efb71d494b2bfe115937d6022db0ab9e6ea3d293839a457480e75430e1" + }, + "lastUpdatedTimestamp": 1721549569547 + } + }, + "systemMetadata": { + "lastObserved": 1724064997288, + "runId": "bigquery-queries-2024_08_19-16_26_33", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.external_table_us_states,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetUsageStatistics", + "aspect": { + "json": { + "timestampMillis": 1721520000000, + "eventGranularity": { + "unit": "DAY", + "multiple": 1 + }, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "uniqueUserCount": 1, + "totalSqlQueries": 2, + "topSqlQueries": [ + "\n select name, post_abbr from `gcp-staging-2.smoke_test_db_4.external_table_us_states`\n", + "\n create or replace table `gcp-staging-2.smoke_test_db_4.derived_table_from_external_table` as (select * from `gcp-staging-2.smoke_test_db_4.external_table_us_states`);\n " + ], + "userCounts": [ + { + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "count": 2 + } + ], + "fieldCounts": [ + { + "fieldPath": "name", + "count": 1 + }, + { + "fieldPath": "post_abbr", + "count": 1 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724065178259, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.partition_test,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1724064997288, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test", + "operationType": "CREATE", + "customProperties": { + "query_urn": "urn:li:query:4bd08adca1e16a1e10673f736ae2c91e0ee68fd56b187bd507f360e429dbcb8c" + }, + "lastUpdatedTimestamp": 1721549559855 + } + }, + "systemMetadata": { + "lastObserved": 1724064997290, + "runId": "bigquery-queries-2024_08_19-16_26_33", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.sharded_table1,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetUsageStatistics", + "aspect": { + "json": { + "timestampMillis": 1721520000000, + "eventGranularity": { + "unit": "DAY", + "multiple": 1 + }, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "uniqueUserCount": 2, + "totalSqlQueries": 8, + "topSqlQueries": [ + "select revenue from `gcp-staging-2.smoke_test_db_4.sharded_table1_*`", + "select revenue, date_utc from gcp-staging-2.smoke_test_db_4.sharded_table1_20230101", + "\n create or replace table `gcp-staging-2.smoke_test_db_4.derived_table_from_sharded_table` as (select * from `gcp-staging-2.smoke_test_db_4.sharded_table1_20230101`);\n ", + "\n create or replace table `gcp-staging-2.smoke_test_db_4.derived_table_from_wildcard_table` as (select * from `gcp-staging-2.smoke_test_db_4.sharded_table1_*`);\n ", + "select revenue, date_utc from `gcp-staging-2.smoke_test_db_4.sharded_table1_*`", + "select revenue from gcp-staging-2.smoke_test_db_4.sharded_table1_20230101", + "select revenue, date_utc from gcp-staging-2.smoke_test_db_4.sharded_table1_20230201", + "select revenue from gcp-staging-2.smoke_test_db_4.sharded_table1_20230201" + ], + "userCounts": [ + { + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "count": 5 + }, + { + "user": "urn:li:corpuser:dh-bigquery-smoke-test-2", + "count": 3 + } + ], + "fieldCounts": [ + { + "fieldPath": "revenue", + "count": 6 + }, + { + "fieldPath": "date_utc", + "count": 3 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724065178267, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.table_from_another_project,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetUsageStatistics", + "aspect": { + "json": { + "timestampMillis": 1721520000000, + "eventGranularity": { + "unit": "DAY", + "multiple": 1 + }, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "uniqueUserCount": 2, + "totalSqlQueries": 2, + "topSqlQueries": [ + "select revenue, date_utc from gcp-staging.smoke_test_db.table_from_another_project", + "select revenue from gcp-staging.smoke_test_db.table_from_another_project" + ], + "userCounts": [ + { + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "count": 1 + }, + { + "user": "urn:li:corpuser:dh-bigquery-smoke-test-2", + "count": 1 + } + ], + "fieldCounts": [ + { + "fieldPath": "revenue", + "count": 2 + }, + { + "fieldPath": "date_utc", + "count": 1 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724065178269, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.partition_test,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1724064997291, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test", + "operationType": "INSERT", + "customProperties": { + "query_urn": "urn:li:query:4f5fd82d4808115ef07900a543b7d6e3551899815d11a945870c607d2dbda56e" + }, + "lastUpdatedTimestamp": 1721549560560 + } + }, + "systemMetadata": { + "lastObserved": 1724064997293, + "runId": "bigquery-queries-2024_08_19-16_26_33", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_3.base_table_2,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetUsageStatistics", + "aspect": { + "json": { + "timestampMillis": 1721520000000, + "eventGranularity": { + "unit": "DAY", + "multiple": 1 + }, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "uniqueUserCount": 2, + "totalSqlQueries": 4, + "topSqlQueries": [ + "select revenue from gcp-staging-2.smoke_test_db_3.base_table_2", + "\n create or replace table `gcp-staging.smoke_test_db.table_from_another_project` as (select * from `gcp-staging-2.smoke_test_db_3.base_table_2`);\n ", + "select revenue, date_utc from gcp-staging-2.smoke_test_db_3.base_table_2", + "\n create or replace table `gcp-staging-2.smoke_test_db_3.derived_table` as (select * from `gcp-staging-2.smoke_test_db_3.base_table_2`);\n " + ], + "userCounts": [ + { + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "count": 3 + }, + { + "user": "urn:li:corpuser:dh-bigquery-smoke-test-2", + "count": 1 + } + ], + "fieldCounts": [ + { + "fieldPath": "revenue", + "count": 2 + }, + { + "fieldPath": "date_utc", + "count": 1 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724065178263, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.destination_table_of_select_query,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1724050800000, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test", + "operationType": "CREATE", + "customProperties": { + "query_urn": "urn:li:query:44c19e1fccfa56779f6958f62d3476819b48af701bbee43ccecf5c9e04d63fc4" + }, + "lastUpdatedTimestamp": 1724322510656 + } + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.table_with_nested_fields,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetUsageStatistics", + "aspect": { + "json": { + "timestampMillis": 1721520000000, + "eventGranularity": { + "unit": "DAY", + "multiple": 1 + }, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "uniqueUserCount": 1, + "totalSqlQueries": 1, + "topSqlQueries": [ + "\n SELECT\n first_name,\n last_name,\n dob,\n addresses[offset(0)].address,\n addresses[offset(0)].city\n FROM \n gcp-staging-2.smoke_test_db_4.table_with_nested_fields\n" + ], + "userCounts": [ + { + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "count": 1 + } + ], + "fieldCounts": [ + { + "fieldPath": "addresses", + "count": 1 + }, + { + "fieldPath": "dob", + "count": 1 + }, + { + "fieldPath": "first_name", + "count": 1 + }, + { + "fieldPath": "last_name", + "count": 1 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724065178276, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:53d20616e4dd30dfc16ccc5771998f5ed93c9afa9b846104a19d072ba364fb5c", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "create or replace table smoke_test_db.base_table (date_utc timestamp, revenue INTEGER)", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 1721549562792, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "lastModified": { + "time": 1721549562792, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + } + } + }, + "systemMetadata": { + "lastObserved": 1724064498936, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:53d20616e4dd30dfc16ccc5771998f5ed93c9afa9b846104a19d072ba364fb5c", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.base_table,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.base_table,PROD),date_utc)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.base_table,PROD),revenue)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724064498936, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.usage_test,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1724064997294, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test", + "operationType": "CREATE", + "customProperties": { + "query_urn": "urn:li:query:cb345cc231c81ec7b871d6727437a87b5bc18a95ecf37e857f07096254c2d2c1" + }, + "lastUpdatedTimestamp": 1721549557257 + } + }, + "systemMetadata": { + "lastObserved": 1724064997295, + "runId": "bigquery-queries-2024_08_19-16_26_33", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.usage_test,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetUsageStatistics", + "aspect": { + "json": { + "timestampMillis": 1724284800000, + "eventGranularity": { + "unit": "DAY", + "multiple": 1 + }, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "uniqueUserCount": 2, + "totalSqlQueries": 11, + "topSqlQueries": [ + "select * from gcp-staging.smoke_test_db.usage_test", + "select value from gcp-staging.smoke_test_db.usage_test", + "select key from gcp-staging.smoke_test_db.usage_test", + "\n select extract(month from date_utc) month, count(*) count, sum(value) total from gcp-staging.smoke_test_db.usage_test\n group by month\n order by total\n " + ], + "userCounts": [ + { + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "count": 8 + }, + { + "user": "urn:li:corpuser:dh-bigquery-smoke-test-2", + "count": 3 + } + ], + "fieldCounts": [ + { + "fieldPath": "value", + "count": 4 + }, + { + "fieldPath": "key", + "count": 3 + }, + { + "fieldPath": "date_utc", + "count": 1 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:b66112fa9691aa02354115b5cef8356390b524fa67c6b06e018c362ac8d0b31d", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "\n create or replace table smoke_test_db.table_from_view\n as (select * from smoke_test_db.view_from_table)\n ", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 1721549572814, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "lastModified": { + "time": 1721549572814, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + } + } + }, + "systemMetadata": { + "lastObserved": 1724064498942, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:b66112fa9691aa02354115b5cef8356390b524fa67c6b06e018c362ac8d0b31d", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_table,PROD)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.table_from_view,PROD)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724064498942, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.usage_test,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1724064997296, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test", + "operationType": "INSERT", + "customProperties": { + "query_urn": "urn:li:query:005ef53d98fe9ce2d807a16f00695367e6923b11729f2fba0db3e694bd2fe9c9" + }, + "lastUpdatedTimestamp": 1721549557813 + } + }, + "systemMetadata": { + "lastObserved": 1724064997298, + "runId": "bigquery-queries-2024_08_19-16_26_33", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:53d20616e4dd30dfc16ccc5771998f5ed93c9afa9b846104a19d072ba364fb5c", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + }, + "systemMetadata": { + "lastObserved": 1724064498937, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_view_on_table,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetUsageStatistics", + "aspect": { + "json": { + "timestampMillis": 1721520000000, + "eventGranularity": { + "unit": "DAY", + "multiple": 1 + }, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "uniqueUserCount": 2, + "totalSqlQueries": 2, + "topSqlQueries": [ + "select revenue from gcp-staging.smoke_test_db.view_from_view_on_table", + "select revenue, date_utc from gcp-staging.smoke_test_db.view_from_view_on_table" + ], + "userCounts": [ + { + "user": "urn:li:corpuser:dh-bigquery-smoke-test-2", + "count": 1 + }, + { + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "count": 1 + } + ], + "fieldCounts": [ + { + "fieldPath": "revenue", + "count": 2 + }, + { + "fieldPath": "date_utc", + "count": 1 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724065178272, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:b66112fa9691aa02354115b5cef8356390b524fa67c6b06e018c362ac8d0b31d", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + }, + "systemMetadata": { + "lastObserved": 1724064498942, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:cb345cc231c81ec7b871d6727437a87b5bc18a95ecf37e857f07096254c2d2c1", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724064498953, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:ebfe552aa0ddb538f3a6c4d444aff757e6df574f16a6dffc3ac146ce587fc491", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724064498953, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:ebfe552aa0ddb538f3a6c4d444aff757e6df574f16a6dffc3ac146ce587fc491", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "insert into smoke_test_db.base_table values (CURRENT_TIMESTAMP(), 100), (TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 1 HOUR), 110)", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 1721549563403, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "lastModified": { + "time": 1721549563403, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + } + } + }, + "systemMetadata": { + "lastObserved": 1724064498935, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:ebfe552aa0ddb538f3a6c4d444aff757e6df574f16a6dffc3ac146ce587fc491", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + }, + "systemMetadata": { + "lastObserved": 1724064498936, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_multiple_tables,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetUsageStatistics", + "aspect": { + "json": { + "timestampMillis": 1721520000000, + "eventGranularity": { + "unit": "DAY", + "multiple": 1 + }, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "uniqueUserCount": 2, + "totalSqlQueries": 2, + "topSqlQueries": [ + "select revenue, date_utc from gcp-staging.smoke_test_db.view_from_multiple_tables", + "select revenue from gcp-staging.smoke_test_db.view_from_multiple_tables" + ], + "userCounts": [ + { + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "count": 1 + }, + { + "user": "urn:li:corpuser:dh-bigquery-smoke-test-2", + "count": 1 + } + ], + "fieldCounts": [ + { + "fieldPath": "revenue", + "count": 2 + }, + { + "fieldPath": "date_utc", + "count": 1 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724065178268, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:ebfe552aa0ddb538f3a6c4d444aff757e6df574f16a6dffc3ac146ce587fc491", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.base_table,PROD)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724064498935, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.customer_demo.purchase_event,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetUsageStatistics", + "aspect": { + "json": { + "timestampMillis": 1724284800000, + "eventGranularity": { + "unit": "DAY", + "multiple": 1 + }, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "uniqueUserCount": 1, + "totalSqlQueries": 79, + "topSqlQueries": [ + "\n SELECT COUNT(*)\n FROM gcp-staging.customer_demo.purchase_event\n WHERE amount < 0\n ", + "SELECT AVG(amount) FROM gcp-staging.customer_demo.purchase_event", + "SELECT COUNT(*) FROM gcp-staging.customer_demo.purchase_event WHERE user_id IS NULL", + "\n SELECT COUNT(*)\n FROM gcp-staging.customer_demo.purchase_event\n WHERE quantity < 0\n ", + "SELECT\n (SUM(CASE WHEN amount BETWEEN 0 AND 10 THEN 1 ELSE 0 END) * 100.0) / COUNT(*) AS percentage_in_range\nFROM\n gcp-staging.customer_demo.purchase_event;" + ], + "userCounts": [ + { + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "count": 79 + } + ], + "fieldCounts": [ + { + "fieldPath": "amount", + "count": 24 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:a4d62b3996203c5661d02d28c1908d209a56e9966cefc274600a76335bc75de0", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724064498951, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.lineage_from_tmp_table,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1724064997307, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test", + "operationType": "CREATE", + "customProperties": { + "query_urn": "urn:li:query:2957e74d80b00aef4f3dc7b0b323d1fa863c78fd882d858186579c0737df00e2" + }, + "lastUpdatedTimestamp": 1721549567376 + } + }, + "systemMetadata": { + "lastObserved": 1724064997309, + "runId": "bigquery-queries-2024_08_19-16_26_33", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:a4d62b3996203c5661d02d28c1908d209a56e9966cefc274600a76335bc75de0", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "\n create or replace table smoke_test_db.table_from_view_and_table\n as (select b.date_utc, v.revenue from smoke_test_db.base_table b, smoke_test_db.view_from_table v)\n ", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 1721549578210, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "lastModified": { + "time": 1721549578210, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + } + } + }, + "systemMetadata": { + "lastObserved": 1724064498947, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:a4d62b3996203c5661d02d28c1908d209a56e9966cefc274600a76335bc75de0", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + }, + "systemMetadata": { + "lastObserved": 1724064498947, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.materialized_view_from_table,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1724064997309, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test", + "operationType": "UNKNOWN", + "customProperties": { + "query_urn": "urn:li:query:a5b8ca0f0b97816db6ca440bdd7c6b11acc823fa70250396b902eb1d46835fbc" + }, + "lastUpdatedTimestamp": 1721549580688 + } + }, + "systemMetadata": { + "lastObserved": 1724064997311, + "runId": "bigquery-queries-2024_08_19-16_26_33", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.table_from_view_and_table,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetUsageStatistics", + "aspect": { + "json": { + "timestampMillis": 1721520000000, + "eventGranularity": { + "unit": "DAY", + "multiple": 1 + }, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "uniqueUserCount": 2, + "totalSqlQueries": 2, + "topSqlQueries": [ + "select revenue from gcp-staging.smoke_test_db.table_from_view_and_table", + "select revenue, date_utc from gcp-staging.smoke_test_db.table_from_view_and_table" + ], + "userCounts": [ + { + "user": "urn:li:corpuser:dh-bigquery-smoke-test-2", + "count": 1 + }, + { + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "count": 1 + } + ], + "fieldCounts": [ + { + "fieldPath": "revenue", + "count": 2 + }, + { + "fieldPath": "date_utc", + "count": 1 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724065178273, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_view_on_table,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1724064997306, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test", + "operationType": "CREATE", + "customProperties": { + "query_urn": "urn:li:query:630f9169072a11dfd8d08a44479f2466acdf2dc2b078b946a739db437b74ad1d" + }, + "lastUpdatedTimestamp": 1721549572202 + } + }, + "systemMetadata": { + "lastObserved": 1724064997308, + "runId": "bigquery-queries-2024_08_19-16_26_33", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.materialized_view_from_table,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1724064997312, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test", + "operationType": "CREATE", + "customProperties": { + "query_urn": "urn:li:query:e1769381f2d261efecb105f3ab6fc8a2fc6717a1509cc65ba125c03841b0923d" + }, + "lastUpdatedTimestamp": 1721549581208 + } + }, + "systemMetadata": { + "lastObserved": 1724064997314, + "runId": "bigquery-queries-2024_08_19-16_26_33", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.table_from_view,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1724064997305, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test", + "operationType": "CREATE", + "customProperties": { + "query_urn": "urn:li:query:b66112fa9691aa02354115b5cef8356390b524fa67c6b06e018c362ac8d0b31d" + }, + "lastUpdatedTimestamp": 1721549572814 + } + }, + "systemMetadata": { + "lastObserved": 1724064997307, + "runId": "bigquery-queries-2024_08_19-16_26_33", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:a4d62b3996203c5661d02d28c1908d209a56e9966cefc274600a76335bc75de0", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.base_table,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.base_table,PROD),date_utc)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_table,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_table,PROD),revenue)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.table_from_view_and_table,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.table_from_view_and_table,PROD),date_utc)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.table_from_view_and_table,PROD),revenue)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724064498947, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.materialized_view_from_table,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724064997316, + "runId": "bigquery-queries-2024_08_19-16_26_33", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.lineage_from_tmp_table,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724064997315, + "runId": "bigquery-queries-2024_08_19-16_26_33", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:8f7bb4efb71d494b2bfe115937d6022db0ab9e6ea3d293839a457480e75430e1", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724064498950, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.sharded_table1,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1724065178292, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test", + "operationType": "CREATE", + "customProperties": { + "query_urn": "urn:li:query:ccc4a40bb7abb852c9a97a2dc80c0928447bf28b91be0e41a80f691ca8e34e35" + }, + "lastUpdatedTimestamp": 1721549588804 + } + }, + "systemMetadata": { + "lastObserved": 1724065178293, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.derived_table_from_external_table,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1724065178287, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test", + "operationType": "CREATE", + "customProperties": { + "query_urn": "urn:li:query:6e250c6966754a5e6532fbb444172dacf5813b0b7afceefbf7772a29878f48f8" + }, + "lastUpdatedTimestamp": 1721549602653 + } + }, + "systemMetadata": { + "lastObserved": 1724065178288, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_3.base_table_2,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1724065178288, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test", + "operationType": "CREATE", + "customProperties": { + "query_urn": "urn:li:query:f53151fa3d689ec865c53fa535479e3fc3cc5794be5daaa9a5a4f7f40a7c660f" + }, + "lastUpdatedTimestamp": 1721549582803 + } + }, + "systemMetadata": { + "lastObserved": 1724065178289, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:005ef53d98fe9ce2d807a16f00695367e6923b11729f2fba0db3e694bd2fe9c9", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724064498947, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.table_with_integer_range_partition,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1724065178299, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test", + "operationType": "CREATE", + "customProperties": { + "query_urn": "urn:li:query:6bbebdb71c5ef982b74fb6bbef58f7f33162566fd9b14385f2da971503317539" + }, + "lastUpdatedTimestamp": 1721549596824 + } + }, + "systemMetadata": { + "lastObserved": 1724065178300, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.sharded_table1,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1724065178293, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test", + "operationType": "INSERT", + "customProperties": { + "query_urn": "urn:li:query:e4e43fe8e17490b4b360a704a5604dd8d55763afd40f10c335e340132a9fe178" + }, + "lastUpdatedTimestamp": 1721549593090 + } + }, + "systemMetadata": { + "lastObserved": 1724065178295, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:a5b8ca0f0b97816db6ca440bdd7c6b11acc823fa70250396b902eb1d46835fbc", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "drop MATERIALIZED VIEW if exists smoke_test_db.materialized_view_from_table", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 1721549580688, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "lastModified": { + "time": 1721549580688, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + } + } + }, + "systemMetadata": { + "lastObserved": 1724064498939, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:a5b8ca0f0b97816db6ca440bdd7c6b11acc823fa70250396b902eb1d46835fbc", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.materialized_view_from_table,PROD)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724064498939, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.table_from_another_project,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1724065178298, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test", + "operationType": "CREATE", + "customProperties": { + "query_urn": "urn:li:query:5efacf0ddad8fd852ba394b29e7a4654ea454915930fb8dd4882c6f294b95cf8" + }, + "lastUpdatedTimestamp": 1721549583501 + } + }, + "systemMetadata": { + "lastObserved": 1724065178299, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.derived_table_from_sharded_table,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1724065178300, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test", + "operationType": "CREATE", + "customProperties": { + "query_urn": "urn:li:query:49b267d0cd050c6a45b4d26bcdc6d9ddceb51aa7ed29399c52ef967e8da2b58d" + }, + "lastUpdatedTimestamp": 1721549598252 + } + }, + "systemMetadata": { + "lastObserved": 1724065178302, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.table_with_ingestion_time_partition,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1724065178307, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test", + "operationType": "CREATE", + "customProperties": { + "query_urn": "urn:li:query:420cad6350235517b34e6e376414d89be0734f87fba6789754be420051d4901c" + }, + "lastUpdatedTimestamp": 1721549596155 + } + }, + "systemMetadata": { + "lastObserved": 1724065178308, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:2d4a0fdd933c8f9009a21f00b3b0213b025d97ff5e6a39cd031e9d5e5c31f832", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "\n CREATE OR REPLACE TABLE `gcp-staging-2.smoke_test_db_4.table_with_nested_fields` (\n id STRING,\n first_name STRING OPTIONS(description = \"First name\"),\n last_name STRING OPTIONS(description = \"Last name\"),\n dob DATE OPTIONS(description = \"Date of birth\"),\n addresses\n ARRAY<\n STRUCT<\n status STRING,\n address STRING OPTIONS(description = \"Full Address\"),\n city STRING,\n state STRING,\n zip STRING,\n numberOfYears STRING>>\n ) OPTIONS (\n description = 'Example name and addresses table');\n ", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 1721549597638, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "lastModified": { + "time": 1721549597638, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + } + } + }, + "systemMetadata": { + "lastObserved": 1724065178307, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:8f7bb4efb71d494b2bfe115937d6022db0ab9e6ea3d293839a457480e75430e1", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "create or replace table smoke_test_db.lineage_from_base as (select * from smoke_test_db.base_table)", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 1721549569547, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "lastModified": { + "time": 1721549569547, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + } + } + }, + "systemMetadata": { + "lastObserved": 1724064498937, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:2d4a0fdd933c8f9009a21f00b3b0213b025d97ff5e6a39cd031e9d5e5c31f832", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + }, + "systemMetadata": { + "lastObserved": 1724065178308, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:2d4a0fdd933c8f9009a21f00b3b0213b025d97ff5e6a39cd031e9d5e5c31f832", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.table_with_nested_fields,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.table_with_nested_fields,PROD),id)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.table_with_nested_fields,PROD),first_name)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.table_with_nested_fields,PROD),last_name)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.table_with_nested_fields,PROD),dob)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.table_with_nested_fields,PROD),addresses)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724065178307, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.sharded_table1,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1724065178296, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test", + "operationType": "CREATE", + "customProperties": { + "query_urn": "urn:li:query:a049f27174fa88a7a7b1b7d5f60d2c353f3e9dd3d4994a8e35c91adb986eac4d" + }, + "lastUpdatedTimestamp": 1721549591093 + } + }, + "systemMetadata": { + "lastObserved": 1724065178298, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_3.derived_table,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1724065178302, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test", + "operationType": "CREATE", + "customProperties": { + "query_urn": "urn:li:query:88c4674d369ef49e881a5ea67ed3485e48f09b9a4924d5282c3ae25004737f95" + }, + "lastUpdatedTimestamp": 1721549585813 + } + }, + "systemMetadata": { + "lastObserved": 1724065178303, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.table_with_nested_fields,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1724065178304, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test", + "operationType": "CREATE", + "customProperties": { + "query_urn": "urn:li:query:2d4a0fdd933c8f9009a21f00b3b0213b025d97ff5e6a39cd031e9d5e5c31f832" + }, + "lastUpdatedTimestamp": 1721549597638 + } + }, + "systemMetadata": { + "lastObserved": 1724065178306, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:e4e43fe8e17490b4b360a704a5604dd8d55763afd40f10c335e340132a9fe178", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "insert into `gcp-staging-2.smoke_test_db_4.sharded_table1_20230201` values (CURRENT_TIMESTAMP(), 100)", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 1721549593090, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "lastModified": { + "time": 1721549593090, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + } + } + }, + "systemMetadata": { + "lastObserved": 1724065178296, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:a5b8ca0f0b97816db6ca440bdd7c6b11acc823fa70250396b902eb1d46835fbc", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + }, + "systemMetadata": { + "lastObserved": 1724064498939, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:e4e43fe8e17490b4b360a704a5604dd8d55763afd40f10c335e340132a9fe178", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + }, + "systemMetadata": { + "lastObserved": 1724065178297, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:e4e43fe8e17490b4b360a704a5604dd8d55763afd40f10c335e340132a9fe178", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.sharded_table1,PROD)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724065178297, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:8f7bb4efb71d494b2bfe115937d6022db0ab9e6ea3d293839a457480e75430e1", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.base_table,PROD)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.lineage_from_base,PROD)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724064498937, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.derived_table_from_wildcard_table,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1724065178284, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test", + "operationType": "CREATE", + "customProperties": { + "query_urn": "urn:li:query:6684f16158660588d874f7ac46dbd7e56ad42acfb95b8a3d1f01292de8dcb930" + }, + "lastUpdatedTimestamp": 1721549600590 + } + }, + "systemMetadata": { + "lastObserved": 1724065178285, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:8f7bb4efb71d494b2bfe115937d6022db0ab9e6ea3d293839a457480e75430e1", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + }, + "systemMetadata": { + "lastObserved": 1724064498937, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.snapshot_from_table,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724064997317, + "runId": "bigquery-queries-2024_08_19-16_26_33", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.table_from_view,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724064997317, + "runId": "bigquery-queries-2024_08_19-16_26_33", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.lineage_from_base,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724064997315, + "runId": "bigquery-queries-2024_08_19-16_26_33", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.derived_table_from_timetravelled_table,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1724065178323, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test", + "operationType": "CREATE", + "customProperties": { + "query_urn": "urn:li:query:2fa44cc9d306c7523477fad59ff43e2e580081ee770da69b9b9f66e119b4dcab" + }, + "lastUpdatedTimestamp": 1721549605511 + } + }, + "systemMetadata": { + "lastObserved": 1724065178324, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.partition_test,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724064997316, + "runId": "bigquery-queries-2024_08_19-16_26_33", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_view_on_table,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724064997320, + "runId": "bigquery-queries-2024_08_19-16_26_33", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:ccc4a40bb7abb852c9a97a2dc80c0928447bf28b91be0e41a80f691ca8e34e35", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "\n create or replace table `gcp-staging-2.smoke_test_db_4.sharded_table1_20230101` OPTIONS(description=\"Description of sharded table ending with _yyyyMMdd\") as (select * from `gcp-staging.smoke_test_db_2.table_from_other_db`) ;\n ", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 1721549588804, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "lastModified": { + "time": 1721549588804, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + } + } + }, + "systemMetadata": { + "lastObserved": 1724065178294, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.usage_test,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724064997318, + "runId": "bigquery-queries-2024_08_19-16_26_33", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:ccc4a40bb7abb852c9a97a2dc80c0928447bf28b91be0e41a80f691ca8e34e35", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + }, + "systemMetadata": { + "lastObserved": 1724065178295, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.derived_table_from_timetravelled_table,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724065178326, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:ccc4a40bb7abb852c9a97a2dc80c0928447bf28b91be0e41a80f691ca8e34e35", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db_2.table_from_other_db,PROD)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.sharded_table1,PROD)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724065178294, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:2957e74d80b00aef4f3dc7b0b323d1fa863c78fd882d858186579c0737df00e2", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724064498948, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.destination_table_of_select_query,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:f53151fa3d689ec865c53fa535479e3fc3cc5794be5daaa9a5a4f7f40a7c660f", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "\n create or replace table `gcp-staging-2.smoke_test_db_3.base_table_2` (date_utc timestamp, revenue INTEGER);\n ", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 1721549582803, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "lastModified": { + "time": 1721549582803, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + } + } + }, + "systemMetadata": { + "lastObserved": 1724065178290, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:a5b8ca0f0b97816db6ca440bdd7c6b11acc823fa70250396b902eb1d46835fbc", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724064498951, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:f53151fa3d689ec865c53fa535479e3fc3cc5794be5daaa9a5a4f7f40a7c660f", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + }, + "systemMetadata": { + "lastObserved": 1724065178291, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:f53151fa3d689ec865c53fa535479e3fc3cc5794be5daaa9a5a4f7f40a7c660f", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_3.base_table_2,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_3.base_table_2,PROD),date_utc)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_3.base_table_2,PROD),revenue)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724065178290, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:4f5fd82d4808115ef07900a543b7d6e3551899815d11a945870c607d2dbda56e", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724064498948, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:53d20616e4dd30dfc16ccc5771998f5ed93c9afa9b846104a19d072ba364fb5c", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724064498949, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.base_table,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724064997314, + "runId": "bigquery-queries-2024_08_19-16_26_33", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:bb1e3c0fd1f6a26c2645cf4ba088a22ff346a9e323c6be451459ecfa7329a991", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724064498952, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:bb3d7f6685e1f71868d0821451e52bfcf1a3bdfeb34c739c0305386256c38f9b", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724064498952, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.table_with_ingestion_time_partition,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724065178327, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:b66112fa9691aa02354115b5cef8356390b524fa67c6b06e018c362ac8d0b31d", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724064498952, + "runId": "bigquery-queries-2024_08_19-16_18_14", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.table_from_view_and_table,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724064997317, + "runId": "bigquery-queries-2024_08_19-16_26_33", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_multiple_tables,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724064997318, + "runId": "bigquery-queries-2024_08_19-16_26_33", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db_2.table_from_other_db,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724064997320, + "runId": "bigquery-queries-2024_08_19-16_26_33", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_snapshot_on_table,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724064997319, + "runId": "bigquery-queries-2024_08_19-16_26_33", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_table,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724064997319, + "runId": "bigquery-queries-2024_08_19-16_26_33", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:composite_0db44e02f671b69df68565346e9d2b68c7166fccf75bd494f34560bfa16c381b", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724064997332, + "runId": "bigquery-queries-2024_08_19-16_26_33", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:6bbebdb71c5ef982b74fb6bbef58f7f33162566fd9b14385f2da971503317539", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.table_with_integer_range_partition,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.table_with_integer_range_partition,PROD),customer_id)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.table_with_integer_range_partition,PROD),date1)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724065178301, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:6bbebdb71c5ef982b74fb6bbef58f7f33162566fd9b14385f2da971503317539", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "\n CREATE or REPLACE TABLE `gcp-staging-2.smoke_test_db_4.table_with_integer_range_partition` (customer_id INT64, date1 DATE)\n PARTITION BY\n RANGE_BUCKET(customer_id, GENERATE_ARRAY(0, 100, 10))\n OPTIONS (\n description = \"Description of Integer Range partitioned table\",\n require_partition_filter = TRUE);\n ", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 1721549596824, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "lastModified": { + "time": 1721549596824, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + } + } + }, + "systemMetadata": { + "lastObserved": 1724065178301, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:6bbebdb71c5ef982b74fb6bbef58f7f33162566fd9b14385f2da971503317539", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724065178339, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.derived_table_from_sharded_table,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724065178325, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_3.derived_table,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724065178325, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:6bbebdb71c5ef982b74fb6bbef58f7f33162566fd9b14385f2da971503317539", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + }, + "systemMetadata": { + "lastObserved": 1724065178301, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.table_with_nested_fields,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724065178328, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:44c19e1fccfa56779f6958f62d3476819b48af701bbee43ccecf5c9e04d63fc4", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:f53151fa3d689ec865c53fa535479e3fc3cc5794be5daaa9a5a4f7f40a7c660f", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724065178346, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.table_with_integer_range_partition,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724065178328, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:5efacf0ddad8fd852ba394b29e7a4654ea454915930fb8dd4882c6f294b95cf8", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724065178338, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.derived_table_from_wildcard_table,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724065178326, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:420cad6350235517b34e6e376414d89be0734f87fba6789754be420051d4901c", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.table_with_ingestion_time_partition,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.table_with_ingestion_time_partition,PROD),transaction_id)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1724065178310, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:420cad6350235517b34e6e376414d89be0734f87fba6789754be420051d4901c", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "\n CREATE or REPLACE TABLE\n `gcp-staging-2.smoke_test_db_4.table_with_ingestion_time_partition` (transaction_id INT64)\n PARTITION BY\n _PARTITIONDATE\n OPTIONS (\n description = \"Description of Ingestion time partitioned table\",\n partition_expiration_days = 3,\n require_partition_filter = TRUE);\n ", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 1721549596155, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + }, + "lastModified": { + "time": 1721549596155, + "actor": "urn:li:corpuser:dh-bigquery-smoke-test" + } + } + }, + "systemMetadata": { + "lastObserved": 1724065178309, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:420cad6350235517b34e6e376414d89be0734f87fba6789754be420051d4901c", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724065178336, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:420cad6350235517b34e6e376414d89be0734f87fba6789754be420051d4901c", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + }, + "systemMetadata": { + "lastObserved": 1724065178310, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.customer_demo.purchase_event,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724065178329, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:2fa44cc9d306c7523477fad59ff43e2e580081ee770da69b9b9f66e119b4dcab", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724065178336, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:ccc4a40bb7abb852c9a97a2dc80c0928447bf28b91be0e41a80f691ca8e34e35", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724065178344, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:6684f16158660588d874f7ac46dbd7e56ad42acfb95b8a3d1f01292de8dcb930", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724065178339, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.external_table_us_states,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724065178327, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:2d4a0fdd933c8f9009a21f00b3b0213b025d97ff5e6a39cd031e9d5e5c31f832", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724065178335, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:88c4674d369ef49e881a5ea67ed3485e48f09b9a4924d5282c3ae25004737f95", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724065178340, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.sharded_table1,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724065178327, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.derived_table_from_external_table,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724065178325, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_3.base_table_2,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724065178324, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:49b267d0cd050c6a45b4d26bcdc6d9ddceb51aa7ed29399c52ef967e8da2b58d", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724065178337, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:e4e43fe8e17490b4b360a704a5604dd8d55763afd40f10c335e340132a9fe178", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724065178345, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.table_from_another_project,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724065178331, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:a049f27174fa88a7a7b1b7d5f60d2c353f3e9dd3d4994a8e35c91adb986eac4d", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724065178341, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:6e250c6966754a5e6532fbb444172dacf5813b0b7afceefbf7772a29878f48f8", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1724065178339, + "runId": "bigquery-queries-2024_08_19-16_29_19", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/bigquery_v2/query_log.json b/metadata-ingestion/tests/integration/bigquery_v2/query_log.json new file mode 100644 index 00000000000000..23513e46eb5fec --- /dev/null +++ b/metadata-ingestion/tests/integration/bigquery_v2/query_log.json @@ -0,0 +1,5404 @@ +[ + { + "query": "create or replace table smoke_test_db.usage_test (date_utc date, key STRING, value INTEGER)", + "session_id": null, + "timestamp": "2024-08-22 10:27:23.887000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "714133dca6aefca9787e41860cac29c12fcf745ebc2022515aafa4a1ac275562", + "usage_multiplier": 1, + "extra_info": { + "job_id": "15e12d10-ecd2-49df-9265-15e1d13063f2", + "statement_type": "CREATE_TABLE", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "usage_test" + }, + "referenced_tables": [] + } + }, + { + "query": "\n insert into smoke_test_db.usage_test values\n (\"2022-05-01\", \"seven\", 7),\n (\"2022-05-02\", \"ten\", 10),\n (\"2022-06-01\", \"four\", 4)\n ", + "session_id": null, + "timestamp": "2024-08-22 10:27:25.357000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "9a0a288a8fb978ca88c0bccb466e099eef0d9a18a2dd4c8b184319820047dd17", + "usage_multiplier": 1, + "extra_info": { + "job_id": "74e32b90-4099-463d-94c8-d7dee843cfa4", + "statement_type": "INSERT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "usage_test" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "usage_test" + } + ] + } + }, + { + "query": "\n create or replace table smoke_test_db.partition_test (date_utc date, revenue INTEGER) \n PARTITION BY date_utc\n ", + "session_id": null, + "timestamp": "2024-08-22 10:27:27.620000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "e5a917a8c68b133b30f7acf5c1a4d843cf00132d3785bf0320fba2ad579c519e", + "usage_multiplier": 1, + "extra_info": { + "job_id": "194dff44-329f-49e6-82f0-865c06c223ba", + "statement_type": "CREATE_TABLE", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "partition_test" + }, + "referenced_tables": [] + } + }, + { + "query": "insert into smoke_test_db.partition_test values (\"2022-05-24\", 20), (\"2022-06-24\", 30)", + "session_id": null, + "timestamp": "2024-08-22 10:27:28.864000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "1442aa8170b7c0a8ae76695175a8a34c06c602aa116656ddd29746f348478c95", + "usage_multiplier": 1, + "extra_info": { + "job_id": "56e06ba7-a054-4100-a0e3-d7d5c7a32d7e", + "statement_type": "INSERT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "partition_test" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "partition_test" + } + ] + } + }, + { + "query": "create or replace table smoke_test_db.base_table (date_utc timestamp, revenue INTEGER)", + "session_id": null, + "timestamp": "2024-08-22 10:27:31.326000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "48c41f9be14b4816933e27350f75d291a87194168b202c835d3fdb0e82e8cc1a", + "usage_multiplier": 1, + "extra_info": { + "job_id": "d7ef294f-8776-4d12-adc8-dd9de5acda16", + "statement_type": "CREATE_TABLE", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "base_table" + }, + "referenced_tables": [] + } + }, + { + "query": "insert into smoke_test_db.base_table values (CURRENT_TIMESTAMP(), 100), (TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 1 HOUR), 110)", + "session_id": null, + "timestamp": "2024-08-22 10:27:32.660000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "73b3a547e874aa113a5623257fd0b174217521b34d1d8189e52bbb67bf38b494", + "usage_multiplier": 1, + "extra_info": { + "job_id": "027cb80d-d2f8-4ef7-931b-c2ba70d0acb1", + "statement_type": "INSERT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "base_table" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "base_table" + } + ] + } + }, + { + "query": "create or replace table _smoke_test_db_tmp_tables.tmp_table as (select * from smoke_test_db.base_table)", + "session_id": null, + "timestamp": "2024-08-22 10:27:35.205000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "ee37a62a711cf6cb094c22761f49fa81f62e9a623d6425274a549be1425e3d59", + "usage_multiplier": 1, + "extra_info": { + "job_id": "47449db3-409f-4518-95fb-bbe986ff82cc", + "statement_type": "CREATE_TABLE_AS_SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_smoke_test_db_tmp_tables", + "table_id": "tmp_table" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "base_table" + } + ] + } + }, + { + "query": "create or replace table smoke_test_db.lineage_from_tmp_table as (select * from _smoke_test_db_tmp_tables.tmp_table)", + "session_id": null, + "timestamp": "2024-08-22 10:27:37.731000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "3a15c1815163f7fc6696fbb4e8e2725246d7ac93a876bf4743a10b22775d3188", + "usage_multiplier": 1, + "extra_info": { + "job_id": "b583ade6-98f9-4323-909e-4b16678f7aaf", + "statement_type": "CREATE_TABLE_AS_SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "lineage_from_tmp_table" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "_smoke_test_db_tmp_tables", + "table_id": "tmp_table" + } + ] + } + }, + { + "query": "\n SELECT last_modified_time\n FROM gcp-staging-2.dataset_as_sharded_table.__TABLES__\n WHERE table_id=\"abc2023\"\n AND last_modified_time >= 1724236058040\n AND last_modified_time <= 1724322458040\n LIMIT 5\n ;", + "session_id": null, + "timestamp": "2024-08-22 10:27:38.597000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "2c44fb03f02e7c6777499bbd1a0e15b075d39b85c2bb544223f01a9e5ce4656d", + "usage_multiplier": 1, + "extra_info": { + "job_id": "953e6ea0-99b4-43dc-b27a-5eefa8cfb1ca", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon77d4fd91_539c_4f4e_9fee_b78ad8c277ea" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "dataset_as_sharded_table", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "\n SELECT COUNT(*)\n FROM gcp-staging.customer_demo.purchase_event\n WHERE amount < 0\n ", + "session_id": null, + "timestamp": "2024-08-22 10:27:39.259000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "92088ca0-f8a7-416b-a6c9-dbfc734d59a6", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon60e011141b8e3c85fb386e55d995a8290fc3a0896c26ac6d5717f14f5f163947" + }, + "referenced_tables": [] + } + }, + { + "query": "SELECT AVG(amount) FROM gcp-staging.customer_demo.purchase_event", + "session_id": null, + "timestamp": "2024-08-22 10:27:39.633000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "ef29f16d-9db8-47fc-bc3a-fdc11fdbd138", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon4db3da963edf09d1d3e6b8dddc9f22960bb56016490ef27620acc390fcf18a27" + }, + "referenced_tables": [] + } + }, + { + "query": "SELECT COUNT(*) \nFROM gcp-staging.customer_demo.purchase_event \nWHERE amount < 0", + "session_id": null, + "timestamp": "2024-08-22 10:27:40.178000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "c572ae0c-c77d-4f72-9284-be1b1fa8d401", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon84323ce6f50f99cfc5d2e8ad640f3923743cb41a6a759f18a7fce13c7a358d28" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT COUNT(*)\n FROM gcp-staging.customer_demo.purchase_event\n WHERE amount < 0\n ", + "session_id": null, + "timestamp": "2024-08-22 10:27:40.256000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "0d12200c-aefe-42cf-92b9-c3e728cc5e2c", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon60e011141b8e3c85fb386e55d995a8290fc3a0896c26ac6d5717f14f5f163947" + }, + "referenced_tables": [] + } + }, + { + "query": "create or replace table smoke_test_db.lineage_from_base as (select * from smoke_test_db.base_table)", + "session_id": null, + "timestamp": "2024-08-22 10:27:40.257000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "ea4879ec408f01bb2e055212f9e42e239a2ab9414230d4e471a41684924f9408", + "usage_multiplier": 1, + "extra_info": { + "job_id": "9ac780d2-ce9a-487e-b5b0-fa50c534600f", + "statement_type": "CREATE_TABLE_AS_SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "lineage_from_base" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "base_table" + } + ] + } + }, + { + "query": "\n SELECT last_modified_time\n FROM gcp-staging-2.dataset_as_sharded_table.__TABLES__\n WHERE table_id=\"abc2023\"\n AND last_modified_time >= 1724236060301\n AND last_modified_time <= 1724322460301\n LIMIT 5\n ;", + "session_id": null, + "timestamp": "2024-08-22 10:27:40.824000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "2c44fb03f02e7c6777499bbd1a0e15b075d39b85c2bb544223f01a9e5ce4656d", + "usage_multiplier": 1, + "extra_info": { + "job_id": "60d848ac-1fcd-4def-a81b-c08b0166b5c5", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon5af5d332_c8eb_4525_a078_3c31668e4a6b" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "dataset_as_sharded_table", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "SELECT COUNT(*) FROM gcp-staging.customer_demo.purchase_event WHERE user_id IS NULL", + "session_id": null, + "timestamp": "2024-08-22 10:27:41.442000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "9eb7efff-67d8-48e4-a4dd-73e7a72e6ce5", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anonb22d0d582afd566e08144b196aec36622576fdf25b6e74a35e044a5b720ce191" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT row_count\n FROM gcp-staging-2.dataset_as_sharded_table.__TABLES__\n WHERE table_id='abc';", + "session_id": null, + "timestamp": "2024-08-22 10:27:41.535000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "81bcba1019e933b2de506faf0f6549b8602519495edc855c0a9267b8d5bb976c", + "usage_multiplier": 1, + "extra_info": { + "job_id": "78284ace-bd28-4a53-a476-cfc86db51919", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anonbafa243d_7625_4485_a27d_6ca94e711867" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "dataset_as_sharded_table", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "SELECT COUNT(*) \nFROM gcp-staging.customer_demo.purchase_event \nWHERE amount < 0", + "session_id": null, + "timestamp": "2024-08-22 10:27:42.038000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "29cb5356-e155-4f88-b79b-d9d7bb2ff47f", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon84323ce6f50f99cfc5d2e8ad640f3923743cb41a6a759f18a7fce13c7a358d28" + }, + "referenced_tables": [] + } + }, + { + "query": "SELECT AVG(amount) FROM gcp-staging.customer_demo.purchase_event", + "session_id": null, + "timestamp": "2024-08-22 10:27:42.191000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "bf6998d0-0984-4d09-80c2-d6167f0bd5be", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon4db3da963edf09d1d3e6b8dddc9f22960bb56016490ef27620acc390fcf18a27" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT row_count\n FROM gcp-staging-2.dataset_as_sharded_table.__TABLES__\n WHERE table_id='abc';", + "session_id": null, + "timestamp": "2024-08-22 10:27:42.281000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "81bcba1019e933b2de506faf0f6549b8602519495edc855c0a9267b8d5bb976c", + "usage_multiplier": 1, + "extra_info": { + "job_id": "4274ea3c-97f4-46eb-95f7-57f801b9643f", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anona67fc478_bd01_4230_9fbd_ec017a318330" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "dataset_as_sharded_table", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "\n create or replace view smoke_test_db.view_from_table\n as (select * from smoke_test_db.base_table)\n ", + "session_id": null, + "timestamp": "2024-08-22 10:27:42.741000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "bf437f8369258ddb78cc9d77370a07344d0910bfa7316c8d5d3237d8134021aa", + "usage_multiplier": 1, + "extra_info": { + "job_id": "3dee2d9b-8a0a-4f85-a953-19bb77d6e5bf", + "statement_type": "CREATE_VIEW", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "view_from_table" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "base_table" + } + ] + } + }, + { + "query": "SELECT AVG(amount) FROM gcp-staging.customer_demo.purchase_event", + "session_id": null, + "timestamp": "2024-08-22 10:27:43.087000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "ba08fe4a-a50c-4485-a00a-37b6eba7e98a", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon4db3da963edf09d1d3e6b8dddc9f22960bb56016490ef27620acc390fcf18a27" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT COUNT(*)\n FROM gcp-staging.customer_demo.purchase_event\n WHERE quantity < 0\n ", + "session_id": null, + "timestamp": "2024-08-22 10:27:43.178000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "3bba8392-5644-4565-a283-01b7ef199138", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon32e6bd0458339a10581928d131db191eaea3e29936a3454e7946ed8a12f17e38" + }, + "referenced_tables": [] + } + }, + { + "query": "SELECT COUNT(*) \nFROM gcp-staging.customer_demo.purchase_event \nWHERE amount < 0", + "session_id": null, + "timestamp": "2024-08-22 10:27:43.300000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "b88614c8-d56f-42fd-8d33-0bbdf082e396", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon84323ce6f50f99cfc5d2e8ad640f3923743cb41a6a759f18a7fce13c7a358d28" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT last_modified_time\n FROM gcp-staging-2.dataset_as_sharded_table.__TABLES__\n WHERE table_id=\"abc\"\n AND last_modified_time >= 1724322061794\n AND last_modified_time <= 1724322463023\n LIMIT 5\n ;", + "session_id": null, + "timestamp": "2024-08-22 10:27:43.523000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "2c44fb03f02e7c6777499bbd1a0e15b075d39b85c2bb544223f01a9e5ce4656d", + "usage_multiplier": 1, + "extra_info": { + "job_id": "cebf6f8d-bb13-41ef-816d-9292eac62cc5", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anondf074cd5_822f_45ff_aa5a_cb4d4d69b71b" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "dataset_as_sharded_table", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "\n create or replace view smoke_test_db.view_from_view_on_table\n as (select * from smoke_test_db.view_from_table)\n ", + "session_id": null, + "timestamp": "2024-08-22 10:27:44.098000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "a3ad3488cc0978b7fad948192fdde83bf669a9fd49ab3c296181e115cbdba713", + "usage_multiplier": 1, + "extra_info": { + "job_id": "d398a69a-f4ba-4e81-b591-871152fb9a03", + "statement_type": "CREATE_VIEW", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "view_from_view_on_table" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "base_table" + } + ] + } + }, + { + "query": "SELECT AVG(amount) FROM gcp-staging.customer_demo.purchase_event", + "session_id": null, + "timestamp": "2024-08-22 10:27:44.456000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "5bbb3cfd-a62c-4a36-b15f-a207d89e2c62", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon4db3da963edf09d1d3e6b8dddc9f22960bb56016490ef27620acc390fcf18a27" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT COUNT(*)\n FROM gcp-staging.customer_demo.purchase_event\n WHERE amount < 0\n ", + "session_id": null, + "timestamp": "2024-08-22 10:27:44.791000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "55dabfc7-a89e-47a6-a065-965c6dafb47e", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon60e011141b8e3c85fb386e55d995a8290fc3a0896c26ac6d5717f14f5f163947" + }, + "referenced_tables": [] + } + }, + { + "query": "\n create or replace table smoke_test_db.table_from_view\n as (select * from smoke_test_db.view_from_table)\n ", + "session_id": null, + "timestamp": "2024-08-22 10:27:45.459000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "047ae44f589ae98693f8b355b284d052dcaac235ff30bdf8df5b89316ab1efd0", + "usage_multiplier": 1, + "extra_info": { + "job_id": "0b71e40e-fe16-4f70-8173-db68f89787e3", + "statement_type": "CREATE_TABLE_AS_SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "table_from_view" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "base_table" + } + ] + } + }, + { + "query": "SELECT COUNT(*) \nFROM gcp-staging.customer_demo.purchase_event \nWHERE amount < 0", + "session_id": null, + "timestamp": "2024-08-22 10:27:45.555000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "dcf62ed3-098b-4e8f-afd0-62297db76b05", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon84323ce6f50f99cfc5d2e8ad640f3923743cb41a6a759f18a7fce13c7a358d28" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT row_count\n FROM gcp-staging-2.dataset_as_sharded_table.__TABLES__\n WHERE table_id='abc';", + "session_id": null, + "timestamp": "2024-08-22 10:27:46.821000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "81bcba1019e933b2de506faf0f6549b8602519495edc855c0a9267b8d5bb976c", + "usage_multiplier": 1, + "extra_info": { + "job_id": "56ee02b5-9e75-4acd-be96-19aee59af431", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon14ea5eb6_003f_40a7_ad6f_44dd1bad7026" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "dataset_as_sharded_table", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "\n create or replace table smoke_test_db_2.table_from_other_db\n as (select * from smoke_test_db.base_table)\n ", + "session_id": null, + "timestamp": "2024-08-22 10:27:47.835000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "2c4b56402d62a63135c5f0e81e4f6391670485bd46d355fdb0a4ce31884a170c", + "usage_multiplier": 1, + "extra_info": { + "job_id": "ea311635-bb0a-4216-a68d-81bc87ec6e89", + "statement_type": "CREATE_TABLE_AS_SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db_2", + "table_id": "table_from_other_db" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "base_table" + } + ] + } + }, + { + "query": "\n create snapshot table if not exists smoke_test_db.snapshot_from_table\n clone smoke_test_db.base_table\n ", + "session_id": null, + "timestamp": "2024-08-22 10:27:50.147000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "27ac78089e13c8ae1c71126ad4036cbba12a0656fe1453f23d55c49202a70a06", + "usage_multiplier": 1, + "extra_info": { + "job_id": "bd00bc76-81b9-4a1c-8bc9-f28e333cdae4", + "statement_type": "CREATE_SNAPSHOT_TABLE", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "snapshot_from_table" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "base_table" + } + ] + } + }, + { + "query": "\n create or replace view smoke_test_db.view_from_snapshot_on_table\n as (select * from smoke_test_db.snapshot_from_table)\n ", + "session_id": null, + "timestamp": "2024-08-22 10:27:51.500000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "d61201ebb9413c74be7a18d33ef712d7a5488bca765977cb311513faff92e4d7", + "usage_multiplier": 1, + "extra_info": { + "job_id": "12ed5ed0-38c6-428c-99d0-d230b140dd1f", + "statement_type": "CREATE_VIEW", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "view_from_snapshot_on_table" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "snapshot_from_table" + } + ] + } + }, + { + "query": "\n create or replace table smoke_test_db.table_from_view_and_table\n as (select b.date_utc, v.revenue from smoke_test_db.base_table b, smoke_test_db.view_from_table v)\n ", + "session_id": null, + "timestamp": "2024-08-22 10:27:52.836000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "dd07587f59ed3d65b092bfb66fe8079129f780a0d9c8b530d1ea8ae868ef7c3e", + "usage_multiplier": 1, + "extra_info": { + "job_id": "955a444b-e9ef-44e4-9cc6-2a93e16a8eb0", + "statement_type": "CREATE_TABLE_AS_SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "table_from_view_and_table" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "base_table" + }, + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "base_table" + } + ] + } + }, + { + "query": "drop MATERIALIZED VIEW if exists smoke_test_db.materialized_view_from_table", + "session_id": null, + "timestamp": "2024-08-22 10:27:55.572000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "246765bb1a63ff4a6550373a2dc1f31070f1247be8db662f42a6fcf27073e9da", + "usage_multiplier": 1, + "extra_info": { + "job_id": "script_job_7ca68f4489c9b94df86a90933cfa7886_0", + "statement_type": "DROP_MATERIALIZED_VIEW", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "materialized_view_from_table" + }, + "referenced_tables": [] + } + }, + { + "query": "create MATERIALIZED VIEW smoke_test_db.materialized_view_from_table as (select * from smoke_test_db.base_table where revenue>100)", + "session_id": null, + "timestamp": "2024-08-22 10:27:56.091000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "2184be1599060a6740d129b60172096d22107b42fc1634bf052db04b4d4f259b", + "usage_multiplier": 1, + "extra_info": { + "job_id": "script_job_034cd994c0ae9fb720f91dabfde3149b_1", + "statement_type": "CREATE_MATERIALIZED_VIEW", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "materialized_view_from_table" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "base_table" + } + ] + } + }, + { + "query": "\n create or replace table `gcp-staging-2.smoke_test_db_3.base_table_2` (date_utc timestamp, revenue INTEGER);\n ", + "session_id": null, + "timestamp": "2024-08-22 10:27:57.705000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "0e636e8703fba1cb8b4bef2404e090c291e848a02324043c086a918264620640", + "usage_multiplier": 1, + "extra_info": { + "job_id": "dd349313-36b1-4607-914c-94df425773a2", + "statement_type": "CREATE_TABLE", + "destination_table": { + "project_id": "gcp-staging-2", + "dataset_id": "smoke_test_db_3", + "table_id": "base_table_2" + }, + "referenced_tables": [] + } + }, + { + "query": "\n create or replace table `gcp-staging.smoke_test_db.table_from_another_project` as (select * from `gcp-staging-2.smoke_test_db_3.base_table_2`);\n ", + "session_id": null, + "timestamp": "2024-08-22 10:27:58.955000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "30362bb4bca3c47a23d588a26faddc8b276bb70e9f38685b4d91bffe0ad955d4", + "usage_multiplier": 1, + "extra_info": { + "job_id": "f4971ee8-f969-4d7c-8744-7b4ce66c29b4", + "statement_type": "CREATE_TABLE_AS_SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "table_from_another_project" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "smoke_test_db_3", + "table_id": "base_table_2" + } + ] + } + }, + { + "query": "\n create or replace table `gcp-staging-2.smoke_test_db_3.derived_table` as (select * from `gcp-staging-2.smoke_test_db_3.base_table_2`);\n ", + "session_id": null, + "timestamp": "2024-08-22 10:28:01.569000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "fd4278ccffdd4f56c411be3c09ed78d09776f8985d2b450d1e724dca39869d33", + "usage_multiplier": 1, + "extra_info": { + "job_id": "5a335e2b-0a28-4ac6-9138-c103ad3193bc", + "statement_type": "CREATE_TABLE_AS_SELECT", + "destination_table": { + "project_id": "gcp-staging-2", + "dataset_id": "smoke_test_db_3", + "table_id": "derived_table" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "smoke_test_db_3", + "table_id": "base_table_2" + } + ] + } + }, + { + "query": "\n create or replace view smoke_test_db.view_from_multiple_tables\n as \n (\n select a.date_utc, a.revenue, b.revenue as revenue2, c.revenue as revenue3 \n from \n smoke_test_db.base_table a\n left join \n smoke_test_db.lineage_from_base b \n on a.date_utc = b.date_utc \n left join \n smoke_test_db_2.table_from_other_db c \n on b.date_utc = c.date_utc \n );\n ", + "session_id": null, + "timestamp": "2024-08-22 10:28:04.293000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "fe631cb81007f8ae1cb9cc51e80a57cba7d21f876ebd8c3791708468d0497496", + "usage_multiplier": 1, + "extra_info": { + "job_id": "8fa83df5-e0ca-449e-a11e-4f5789e67e8e", + "statement_type": "CREATE_VIEW", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "view_from_multiple_tables" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "base_table" + }, + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "base_table" + }, + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "materialized_view_from_table" + }, + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "lineage_from_base" + }, + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db_2", + "table_id": "table_from_other_db" + } + ] + } + }, + { + "query": "\n create or replace table `gcp-staging-2.smoke_test_db_4.sharded_table1_20230101` OPTIONS(description=\"Description of sharded table ending with _yyyyMMdd\") as (select * from `gcp-staging.smoke_test_db_2.table_from_other_db`) ;\n ", + "session_id": null, + "timestamp": "2024-08-22 10:28:05.618000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "04834ed24134c3a61000b5df49c6b4a99db596b9f7535894922a06d050d4d084", + "usage_multiplier": 1, + "extra_info": { + "job_id": "def72ad2-7f1e-4d39-a2d9-e2e49984dd9a", + "statement_type": "CREATE_TABLE_AS_SELECT", + "destination_table": { + "project_id": "gcp-staging-2", + "dataset_id": "smoke_test_db_4", + "table_id": "sharded_table1_20230101" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db_2", + "table_id": "table_from_other_db" + } + ] + } + }, + { + "query": "\n create or replace table `gcp-staging-2.smoke_test_db_4.sharded_table1_20230201` OPTIONS(description=\"Description of sharded table ending with _yyyyMMdd\") as (select * from `gcp-staging.smoke_test_db_2.table_from_other_db`);\n ", + "session_id": null, + "timestamp": "2024-08-22 10:28:08.551000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "23d48cd9268ba14ba5274cc16188f5504efa64b022cc83c877f734b69c398d02", + "usage_multiplier": 1, + "extra_info": { + "job_id": "1647e16c-5a81-4b25-852f-ae08e25c18b4", + "statement_type": "CREATE_TABLE_AS_SELECT", + "destination_table": { + "project_id": "gcp-staging-2", + "dataset_id": "smoke_test_db_4", + "table_id": "sharded_table1_20230201" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db_2", + "table_id": "table_from_other_db" + } + ] + } + }, + { + "query": "insert into `gcp-staging-2.smoke_test_db_4.sharded_table1_20230201` values (CURRENT_TIMESTAMP(), 100)", + "session_id": null, + "timestamp": "2024-08-22 10:28:11.425000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "0238bbbbabf55ab3543b48a129753ee1726ccce34ab30e79d1eb1278f75723dc", + "usage_multiplier": 1, + "extra_info": { + "job_id": "ede91fe9-615f-4877-86b6-65211b1c16d7", + "statement_type": "INSERT", + "destination_table": { + "project_id": "gcp-staging-2", + "dataset_id": "smoke_test_db_4", + "table_id": "sharded_table1_20230201" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "smoke_test_db_4", + "table_id": "sharded_table1_20230201" + } + ] + } + }, + { + "query": "\n CREATE or REPLACE TABLE\n `gcp-staging-2.smoke_test_db_4.table_with_ingestion_time_partition` (transaction_id INT64)\n PARTITION BY\n _PARTITIONDATE\n OPTIONS (\n description = \"Description of Ingestion time partitioned table\",\n partition_expiration_days = 3,\n require_partition_filter = TRUE);\n ", + "session_id": null, + "timestamp": "2024-08-22 10:28:15.660000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "3fe79bd659d7bb81ebc0f005eff68e9f3dc037c2383b3109369871192d708a40", + "usage_multiplier": 1, + "extra_info": { + "job_id": "07f61a59-999e-4cad-b5d2-e36f3efc3e1c", + "statement_type": "CREATE_TABLE", + "destination_table": { + "project_id": "gcp-staging-2", + "dataset_id": "smoke_test_db_4", + "table_id": "table_with_ingestion_time_partition" + }, + "referenced_tables": [] + } + }, + { + "query": "\n CREATE or REPLACE TABLE `gcp-staging-2.smoke_test_db_4.table_with_integer_range_partition` (customer_id INT64, date1 DATE)\n PARTITION BY\n RANGE_BUCKET(customer_id, GENERATE_ARRAY(0, 100, 10))\n OPTIONS (\n description = \"Description of Integer Range partitioned table\",\n require_partition_filter = TRUE);\n ", + "session_id": null, + "timestamp": "2024-08-22 10:28:17.080000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "a8567cfec7bc0c2c17aae4266a7d8f01d5f2d3fa720a620ab5bafafc79fa2971", + "usage_multiplier": 1, + "extra_info": { + "job_id": "35c66a87-c24d-4a84-aaa7-02ad67c7924e", + "statement_type": "CREATE_TABLE", + "destination_table": { + "project_id": "gcp-staging-2", + "dataset_id": "smoke_test_db_4", + "table_id": "table_with_integer_range_partition" + }, + "referenced_tables": [] + } + }, + { + "query": "\n CREATE OR REPLACE TABLE `gcp-staging-2.smoke_test_db_4.table_with_nested_fields` (\n id STRING,\n first_name STRING OPTIONS(description = \"First name\"),\n last_name STRING OPTIONS(description = \"Last name\"),\n dob DATE OPTIONS(description = \"Date of birth\"),\n addresses\n ARRAY<\n STRUCT<\n status STRING,\n address STRING OPTIONS(description = \"Full Address\"),\n city STRING,\n state STRING,\n zip STRING,\n numberOfYears STRING>>\n ) OPTIONS (\n description = 'Example name and addresses table');\n ", + "session_id": null, + "timestamp": "2024-08-22 10:28:18.418000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "590b097235695f628515f838f96c71e86b2d763bf480ab07a67700023885b250", + "usage_multiplier": 1, + "extra_info": { + "job_id": "6ce6ff3a-80e3-4766-84cf-d285a86ea704", + "statement_type": "CREATE_TABLE", + "destination_table": { + "project_id": "gcp-staging-2", + "dataset_id": "smoke_test_db_4", + "table_id": "table_with_nested_fields" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT last_modified_time\n FROM gcp-staging-2.dataset_as_sharded_table.__TABLES__\n WHERE table_id=\"abc2023\"\n AND last_modified_time >= 1724236098639\n AND last_modified_time <= 1724322498639\n LIMIT 5\n ;", + "session_id": null, + "timestamp": "2024-08-22 10:28:19.276000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "2c44fb03f02e7c6777499bbd1a0e15b075d39b85c2bb544223f01a9e5ce4656d", + "usage_multiplier": 1, + "extra_info": { + "job_id": "24336324-7714-4ab2-95f4-243f5e54950f", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anonf4cce75c_4c8c_4ef2_802e_0f6700b6f877" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "dataset_as_sharded_table", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "\n create or replace table `gcp-staging-2.smoke_test_db_4.derived_table_from_sharded_table` as (select * from `gcp-staging-2.smoke_test_db_4.sharded_table1_20230101`);\n ", + "session_id": null, + "timestamp": "2024-08-22 10:28:20.148000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "466ee2217419c64f1bdcf7d693317f73a6cbef15abb6424711b990a78d5e3273", + "usage_multiplier": 1, + "extra_info": { + "job_id": "0d6a54d2-102a-418f-a8fe-25cfac92abbe", + "statement_type": "CREATE_TABLE_AS_SELECT", + "destination_table": { + "project_id": "gcp-staging-2", + "dataset_id": "smoke_test_db_4", + "table_id": "derived_table_from_sharded_table" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "smoke_test_db_4", + "table_id": "sharded_table1_20230101" + } + ] + } + }, + { + "query": "SELECT COUNT(*) FROM gcp-staging.customer_demo.purchase_event WHERE user_id IS NULL", + "session_id": null, + "timestamp": "2024-08-22 10:28:21.530000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "89b073c2-bf2b-49b1-a983-9589de5ec524", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anonb22d0d582afd566e08144b196aec36622576fdf25b6e74a35e044a5b720ce191" + }, + "referenced_tables": [] + } + }, + { + "query": "\n create or replace table `gcp-staging-2.smoke_test_db_4.derived_table_from_wildcard_table` as (select * from `gcp-staging-2.smoke_test_db_4.sharded_table1_*`);\n ", + "session_id": null, + "timestamp": "2024-08-22 10:28:22.689000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "bb9db7b8cca0e55c5fcc8a5ced32d780886c6eb052b022731b33b42f852cc0c9", + "usage_multiplier": 1, + "extra_info": { + "job_id": "2b1929cc-ad64-44f6-9069-e9afbaa3ef2a", + "statement_type": "CREATE_TABLE_AS_SELECT", + "destination_table": { + "project_id": "gcp-staging-2", + "dataset_id": "smoke_test_db_4", + "table_id": "derived_table_from_wildcard_table" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "smoke_test_db_4", + "table_id": "sharded_table1_*" + } + ] + } + }, + { + "query": "\n SELECT row_count\n FROM gcp-staging-2.dataset_as_sharded_table.__TABLES__\n WHERE table_id='abc';", + "session_id": null, + "timestamp": "2024-08-22 10:28:23.898000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "81bcba1019e933b2de506faf0f6549b8602519495edc855c0a9267b8d5bb976c", + "usage_multiplier": 1, + "extra_info": { + "job_id": "0a7899cf-c0ac-4e67-8733-ed6374e7f920", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon2f4ead4a_d35d_445d_8819_ed92bc8748cf" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "dataset_as_sharded_table", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "SELECT AVG(amount) FROM gcp-staging.customer_demo.purchase_event", + "session_id": null, + "timestamp": "2024-08-22 10:28:24.028000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "4f04684e-202e-4afe-b4df-f813d0925999", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon4db3da963edf09d1d3e6b8dddc9f22960bb56016490ef27620acc390fcf18a27" + }, + "referenced_tables": [] + } + }, + { + "query": "SELECT COUNT(*) \nFROM gcp-staging.customer_demo.purchase_event \nWHERE amount < 0", + "session_id": null, + "timestamp": "2024-08-22 10:28:25.181000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "e7014fff-ef9e-4008-88c2-41b5b15c96af", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon84323ce6f50f99cfc5d2e8ad640f3923743cb41a6a759f18a7fce13c7a358d28" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT row_count\n FROM gcp-staging-2.dataset_as_sharded_table.__TABLES__\n WHERE table_id='abc';", + "session_id": null, + "timestamp": "2024-08-22 10:28:25.260000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "81bcba1019e933b2de506faf0f6549b8602519495edc855c0a9267b8d5bb976c", + "usage_multiplier": 1, + "extra_info": { + "job_id": "6266127d-3d3b-4393-97f6-0725079c9f0a", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anonf31236fd_fb12_4e4d_9bdf_47983e7013f6" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "dataset_as_sharded_table", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "\n create or replace table `gcp-staging-2.smoke_test_db_4.derived_table_from_external_table` as (select * from `gcp-staging-2.smoke_test_db_4.external_table_us_states`);\n ", + "session_id": null, + "timestamp": "2024-08-22 10:28:25.477000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "bda343fa9d8839d698b5e4c92352f6a2e4f9788c6302a972e1177fe50f8be872", + "usage_multiplier": 1, + "extra_info": { + "job_id": "3f8a74d1-a74b-4757-a1ae-7ee12580be0f", + "statement_type": "CREATE_TABLE_AS_SELECT", + "destination_table": { + "project_id": "gcp-staging-2", + "dataset_id": "smoke_test_db_4", + "table_id": "derived_table_from_external_table" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "smoke_test_db_4", + "table_id": "external_table_us_states" + } + ] + } + }, + { + "query": "\n SELECT last_modified_time\n FROM gcp-staging-2.dataset_as_sharded_table.__TABLES__\n WHERE table_id=\"abc\"\n AND last_modified_time >= 1724322464301\n AND last_modified_time <= 1724322504931\n LIMIT 5\n ;", + "session_id": null, + "timestamp": "2024-08-22 10:28:25.508000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "2c44fb03f02e7c6777499bbd1a0e15b075d39b85c2bb544223f01a9e5ce4656d", + "usage_multiplier": 1, + "extra_info": { + "job_id": "b5fa72b4-4bf8-4ff2-86fc-0e6f762160a7", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon1d72692a_44df_4f8c_9b1c_6aae8a38a887" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "dataset_as_sharded_table", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "\n SELECT COUNT(*)\n FROM gcp-staging.customer_demo.purchase_event\n WHERE quantity < 0\n ", + "session_id": null, + "timestamp": "2024-08-22 10:28:26.413000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "74775f70-cbaa-43b5-8136-a9185f5cec66", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon32e6bd0458339a10581928d131db191eaea3e29936a3454e7946ed8a12f17e38" + }, + "referenced_tables": [] + } + }, + { + "query": "SELECT AVG(amount) FROM gcp-staging.customer_demo.purchase_event", + "session_id": null, + "timestamp": "2024-08-22 10:28:26.413000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "dcd0881c-4027-4585-ae64-95e2efc891a1", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon4db3da963edf09d1d3e6b8dddc9f22960bb56016490ef27620acc390fcf18a27" + }, + "referenced_tables": [] + } + }, + { + "query": "SELECT COUNT(*) FROM gcp-staging.customer_demo.purchase_event WHERE user_id IS NULL", + "session_id": null, + "timestamp": "2024-08-22 10:28:26.850000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "64aadd1d-2596-4600-9452-f67af261c0ea", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anonb22d0d582afd566e08144b196aec36622576fdf25b6e74a35e044a5b720ce191" + }, + "referenced_tables": [] + } + }, + { + "query": "SELECT COUNT(*) \nFROM gcp-staging.customer_demo.purchase_event \nWHERE amount < 0", + "session_id": null, + "timestamp": "2024-08-22 10:28:27.362000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "8dfe3671-cfac-4bbf-bf17-325bd9a13aa0", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon84323ce6f50f99cfc5d2e8ad640f3923743cb41a6a759f18a7fce13c7a358d28" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT last_modified_time\n FROM gcp-staging-2.dataset_as_sharded_table.__TABLES__\n WHERE table_id=\"abc2023\"\n AND last_modified_time >= 1724236106863\n AND last_modified_time <= 1724322506863\n LIMIT 5\n ;", + "session_id": null, + "timestamp": "2024-08-22 10:28:27.443000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "2c44fb03f02e7c6777499bbd1a0e15b075d39b85c2bb544223f01a9e5ce4656d", + "usage_multiplier": 1, + "extra_info": { + "job_id": "9b8500cf-1c04-4c1e-8a13-e15577e67641", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anonc9084373_367a_4390_af54_16f619797096" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "dataset_as_sharded_table", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "SELECT COUNT(*) \nFROM gcp-staging.customer_demo.purchase_event \nWHERE amount < 0", + "session_id": null, + "timestamp": "2024-08-22 10:28:28.030000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "1c014139-8f13-46b9-b440-27259f3b7701", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon84323ce6f50f99cfc5d2e8ad640f3923743cb41a6a759f18a7fce13c7a358d28" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT COUNT(*)\n FROM gcp-staging.customer_demo.purchase_event\n WHERE amount < 0\n ", + "session_id": null, + "timestamp": "2024-08-22 10:28:28.141000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "a8a83a53-e200-4205-8e30-73d11c149e44", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon60e011141b8e3c85fb386e55d995a8290fc3a0896c26ac6d5717f14f5f163947" + }, + "referenced_tables": [] + } + }, + { + "query": "\n create or replace table `gcp-staging-2.smoke_test_db_4.derived_table_from_timetravelled_table` as (\n SELECT *\n FROM `gcp-staging.smoke_test_db.base_table`\n FOR SYSTEM_TIME AS OF TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 1 HOUR)\n );\n ", + "session_id": null, + "timestamp": "2024-08-22 10:28:28.214000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "d8fba9ffc7dad8c86eded34dcaf16829629501451411380ec17084a1e2d38432", + "usage_multiplier": 1, + "extra_info": { + "job_id": "e4aa2fb4-56b5-4515-a50d-75ec31794ff2", + "statement_type": "CREATE_TABLE_AS_SELECT", + "destination_table": { + "project_id": "gcp-staging-2", + "dataset_id": "smoke_test_db_4", + "table_id": "derived_table_from_timetravelled_table" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "base_table@1724318908218" + } + ] + } + }, + { + "query": "CREATE TABLE `gcp-staging-2.smoke_test_db_4.destination_table_of_select_query` AS\n (\n SELECT * FROM `gcp-staging.smoke_test_db.base_table`\n )", + "session_id": null, + "timestamp": "2024-08-22 10:28:30.656000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "e9226e326a20c90425b04cd42358663c8446312d0d8dadf78b177ab708d92343", + "usage_multiplier": 1, + "extra_info": { + "job_id": "6486765d-2753-477f-9e11-cfcfb6407c8c", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging-2", + "dataset_id": "smoke_test_db_4", + "table_id": "destination_table_of_select_query" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "materialized_view_from_table" + }, + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "base_table" + } + ] + } + }, + { + "query": "\n SELECT COUNT(*)\n FROM gcp-staging.customer_demo.purchase_event\n WHERE quantity < 0\n ", + "session_id": null, + "timestamp": "2024-08-22 10:28:31.305000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "d02b0ba9-0c04-47ce-82ae-dd2686458739", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon32e6bd0458339a10581928d131db191eaea3e29936a3454e7946ed8a12f17e38" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT row_count\n FROM gcp-staging-2.dataset_as_sharded_table.__TABLES__\n WHERE table_id='abc';", + "session_id": null, + "timestamp": "2024-08-22 10:28:32.614000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "81bcba1019e933b2de506faf0f6549b8602519495edc855c0a9267b8d5bb976c", + "usage_multiplier": 1, + "extra_info": { + "job_id": "a71ddace-2595-4b3f-a677-258b46409f65", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anone859c132_30c8_476f_8be3_8f7882e8c4d7" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "dataset_as_sharded_table", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "SELECT AVG(amount) FROM gcp-staging.customer_demo.purchase_event", + "session_id": null, + "timestamp": "2024-08-22 10:28:33.038000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "f78b5ce5-713b-4898-b13f-91b6304c2cce", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon4db3da963edf09d1d3e6b8dddc9f22960bb56016490ef27620acc390fcf18a27" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT last_modified_time\n FROM gcp-staging.customer_demo.__TABLES__\n WHERE table_id=\"purchase_event\"\n AND last_modified_time >= 1692786512601\n AND last_modified_time <= 1724322512601\n LIMIT 5\n ;", + "session_id": null, + "timestamp": "2024-08-22 10:28:33.155000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "c4076a26606e9f6d6b9fde2855758ba67b1155b110a65f72cf1999fa1facdfa4", + "usage_multiplier": 1, + "extra_info": { + "job_id": "87f27258-3890-47d3-97da-75151c67d647", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon6599dcd6_73ba_42e7_b0a3_d9a5985a4ddd" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "customer_demo", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "select revenue, date_utc from gcp-staging.smoke_test_db.base_table", + "session_id": null, + "timestamp": "2024-08-22 10:28:34.004000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "376701d9b1677febe3189c534c475310e79dd893ac7759e42f59433240d3a83b", + "usage_multiplier": 1, + "extra_info": { + "job_id": "139c0fa9-cc67-4ec4-85cf-1b9a231f4179", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anonac9eceea_3ec8_4740_905e_eeab3bc9b91d" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "base_table" + }, + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "materialized_view_from_table" + } + ] + } + }, + { + "query": "\n SELECT last_modified_time\n FROM gcp-staging-2.dataset_as_sharded_table.__TABLES__\n WHERE table_id=\"abc2023\"\n AND last_modified_time >= 1724236114344\n AND last_modified_time <= 1724322514344\n LIMIT 5\n ;", + "session_id": null, + "timestamp": "2024-08-22 10:28:34.896000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "2c44fb03f02e7c6777499bbd1a0e15b075d39b85c2bb544223f01a9e5ce4656d", + "usage_multiplier": 1, + "extra_info": { + "job_id": "9f864017-5753-4a2e-b2cc-53ac789dd9ea", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anonf6b6eab6_7da6_451b_923e_cd310a5df0eb" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "dataset_as_sharded_table", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "SELECT COUNT(*) FROM gcp-staging.customer_demo.purchase_event WHERE user_id IS NULL", + "session_id": null, + "timestamp": "2024-08-22 10:28:36.387000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "a915c600-d661-432c-9236-df37a7c65c1e", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anonb22d0d582afd566e08144b196aec36622576fdf25b6e74a35e044a5b720ce191" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT row_count\n FROM gcp-staging.customer_demo.__TABLES__\n WHERE table_id='purchase_event';", + "session_id": null, + "timestamp": "2024-08-22 10:28:36.511000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "8aed7604ce6319236cca0b0f7d16614774821c2666f5cf6f5be2cec702026d64", + "usage_multiplier": 1, + "extra_info": { + "job_id": "8d362768-c6e9-42ee-bbaf-3b114a32d827", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon3c7a5401_2635_4e79_b452_21ede12dacf2" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "customer_demo", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "\n SELECT COUNT(*)\n FROM gcp-staging.customer_demo.purchase_event\n WHERE amount < 0\n ", + "session_id": null, + "timestamp": "2024-08-22 10:28:37.586000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "450d30ee-719e-43c2-b9f0-4ef252080b54", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon60e011141b8e3c85fb386e55d995a8290fc3a0896c26ac6d5717f14f5f163947" + }, + "referenced_tables": [] + } + }, + { + "query": "SELECT\n (SUM(CASE WHEN amount BETWEEN 0 AND 10 THEN 1 ELSE 0 END) * 100.0) / COUNT(*) AS percentage_in_range\nFROM\n gcp-staging.customer_demo.purchase_event;", + "session_id": null, + "timestamp": "2024-08-22 10:28:38.119000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "43486ae9-5d58-47a0-98e0-7a064aa634b5", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anondaaf8a98bb16f9b4fd4e7f6ada08bac86581b8e2a25e55399914c155fd8566b7" + }, + "referenced_tables": [] + } + }, + { + "query": "select revenue, date_utc from gcp-staging.smoke_test_db.partition_test", + "session_id": null, + "timestamp": "2024-08-22 10:28:38.956000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "43b7b24e1b461dd151b2fa4f3e9ff9d4f03f831dfff20b1d7016686d9ac29e7c", + "usage_multiplier": 1, + "extra_info": { + "job_id": "12049165-b270-4f94-9531-03542fc9f680", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anond6b394e5_9ab5_4d08_a48c_9826e78e05ef" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "partition_test" + } + ] + } + }, + { + "query": "select revenue, date_utc from gcp-staging.smoke_test_db.lineage_from_tmp_table", + "session_id": null, + "timestamp": "2024-08-22 10:28:42.802000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "a5460497edcdd62d40807fd1810005e02b13b930fe560570e16a6a652854390b", + "usage_multiplier": 1, + "extra_info": { + "job_id": "477cc9dd-ffca-4352-a8ed-ebd1e7c54967", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon00c98d0b_6cbc_4752_8a0b_d0f5bff232fb" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "lineage_from_tmp_table" + } + ] + } + }, + { + "query": "select revenue, date_utc from gcp-staging.smoke_test_db.lineage_from_base", + "session_id": null, + "timestamp": "2024-08-22 10:28:45.871000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "2a4f4d44119c98882739e7d1360a5544c0a4b95a45198f789ac1c6efdf3d7e10", + "usage_multiplier": 1, + "extra_info": { + "job_id": "6c29d65a-fb33-4805-bd32-4a9e00ceee68", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon37cb5eae_6fb4_4f80_a2b5_dcdf18293e5b" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "lineage_from_base" + } + ] + } + }, + { + "query": "select revenue, date_utc from gcp-staging.smoke_test_db.view_from_table", + "session_id": null, + "timestamp": "2024-08-22 10:28:48.915000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "c039e0e092bcd3150010cddee2a172af283951c43db68caf513593e4f552be28", + "usage_multiplier": 1, + "extra_info": { + "job_id": "6f142a0e-d71c-40f7-9b76-70080d55638f", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon0026b3be_d07b_4302_b7ca_fe6c2b3ce150" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "base_table" + }, + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "materialized_view_from_table" + } + ] + } + }, + { + "query": "select revenue, date_utc from gcp-staging.smoke_test_db.view_from_view_on_table", + "session_id": null, + "timestamp": "2024-08-22 10:28:52.005000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "3dd24342ec0b26c692285908192e7eb6ad5e38f25816bce80405c57acc109c0f", + "usage_multiplier": 1, + "extra_info": { + "job_id": "6b9755dd-e5e4-4c06-b850-72b02a0c583d", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon264bee95_5a2e_411d_971d_3b15c65475c0" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "materialized_view_from_table" + }, + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "base_table" + } + ] + } + }, + { + "query": "select revenue, date_utc from gcp-staging.smoke_test_db.table_from_view", + "session_id": null, + "timestamp": "2024-08-22 10:28:55.333000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "4c08bebf0f438dcb4fda1eb2cb02e41ff216ee613eb9ffcc8a9e01dffd1f5d85", + "usage_multiplier": 1, + "extra_info": { + "job_id": "978ee15e-fc5f-4464-a288-4a588d10a72b", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon57c90059_bdfe_4f67_a482_8423f135abd5" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "table_from_view" + } + ] + } + }, + { + "query": "select revenue, date_utc from gcp-staging.smoke_test_db.table_from_view_and_table", + "session_id": null, + "timestamp": "2024-08-22 10:28:58.350000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "fc92290f2da155cba009370357576469c7262cff1fd4b521399e8b555fb9457e", + "usage_multiplier": 1, + "extra_info": { + "job_id": "762a642a-9d71-4ffd-a0af-f1a6f0843643", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon32470674_7f5e_4d81_875c_08afdc7ee994" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "table_from_view_and_table" + } + ] + } + }, + { + "query": "SELECT COUNT(*) \nFROM gcp-staging.customer_demo.purchase_event \nWHERE amount < 0", + "session_id": null, + "timestamp": "2024-08-22 10:29:00.852000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "0ce60fad-0590-4b9d-a333-c0651033c162", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon84323ce6f50f99cfc5d2e8ad640f3923743cb41a6a759f18a7fce13c7a358d28" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT row_count\n FROM gcp-staging-2.dataset_as_sharded_table.__TABLES__\n WHERE table_id='abc';", + "session_id": null, + "timestamp": "2024-08-22 10:29:01.249000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "81bcba1019e933b2de506faf0f6549b8602519495edc855c0a9267b8d5bb976c", + "usage_multiplier": 1, + "extra_info": { + "job_id": "9e9dcf86-6712-4981-a7a8-7abff832a52b", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon4765d310_ebab_49c6_8edb_2a8b5610bc41" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "dataset_as_sharded_table", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "select revenue, date_utc from gcp-staging.smoke_test_db.materialized_view_from_table", + "session_id": null, + "timestamp": "2024-08-22 10:29:01.401000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "24ffb91f53691b2b9082fc4582d5836ec8501613821b7de7dda75b0b7b918881", + "usage_multiplier": 1, + "extra_info": { + "job_id": "cd5b5b95-41ad-4451-b9c8-f7eedc134b22", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon2173da01_0669_4d20_9814_155ccaf243a7" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "base_table" + }, + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "materialized_view_from_table" + } + ] + } + }, + { + "query": "SELECT AVG(amount) FROM gcp-staging.customer_demo.purchase_event", + "session_id": null, + "timestamp": "2024-08-22 10:29:01.448000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "ed85e89b-173a-4acc-8e38-56ac85b501a9", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon4db3da963edf09d1d3e6b8dddc9f22960bb56016490ef27620acc390fcf18a27" + }, + "referenced_tables": [] + } + }, + { + "query": "select revenue, date_utc from gcp-staging.smoke_test_db.view_from_multiple_tables", + "session_id": null, + "timestamp": "2024-08-22 10:29:04.870000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "58cdaed9da9d3cd837048b70d7d48fa485483cb5e85755803feb28c30aefc800", + "usage_multiplier": 1, + "extra_info": { + "job_id": "cb16a84f-23e3-48f1-9cbd-cd6c1ae2ccdb", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anone6b9ea5f_d7b2_4d88_bce8_67813e1297be" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db_2", + "table_id": "table_from_other_db" + }, + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "lineage_from_base" + }, + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "materialized_view_from_table" + }, + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "base_table" + } + ] + } + }, + { + "query": "select revenue, date_utc from gcp-staging.smoke_test_db_2.table_from_other_db", + "session_id": null, + "timestamp": "2024-08-22 10:29:08.392000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "8073f5c704e675d8c4383a750c2c88da65437a38e28a1f7f06f1162531d544a9", + "usage_multiplier": 1, + "extra_info": { + "job_id": "13b0be99-04e9-40e9-80fb-3ca54815bcc0", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon02c064af_8ff6_41f5_9a59_ad6b45254529" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db_2", + "table_id": "table_from_other_db" + } + ] + } + }, + { + "query": "select revenue, date_utc from gcp-staging-2.smoke_test_db_3.base_table_2", + "session_id": null, + "timestamp": "2024-08-22 10:29:11.240000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "cad2086113da8705258cfb5e0531e75d26aeccc85ddbf774c18121bb8716af7a", + "usage_multiplier": 1, + "extra_info": { + "job_id": "ab261d82-f178-44bc-8f16-135a2b7698f0", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon0bf48c7f_6413_4559_9fdd_dc4f0be48c47" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "smoke_test_db_3", + "table_id": "base_table_2" + } + ] + } + }, + { + "query": "SELECT COUNT(*) FROM gcp-staging.customer_demo.purchase_event WHERE user_id IS NULL", + "session_id": null, + "timestamp": "2024-08-22 10:29:11.898000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "107b0124-7754-4d72-8e50-3ed562afdeb7", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anonb22d0d582afd566e08144b196aec36622576fdf25b6e74a35e044a5b720ce191" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT COUNT(*)\n FROM gcp-staging.customer_demo.purchase_event\n WHERE amount < 0\n ", + "session_id": null, + "timestamp": "2024-08-22 10:29:13.829000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "be2fc0f0-13be-44ad-a77c-2d81391f206c", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon60e011141b8e3c85fb386e55d995a8290fc3a0896c26ac6d5717f14f5f163947" + }, + "referenced_tables": [] + } + }, + { + "query": "select revenue, date_utc from gcp-staging.smoke_test_db.table_from_another_project", + "session_id": null, + "timestamp": "2024-08-22 10:29:14.152000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "22c18f1bdf759bab7dcd2f26b3b66cc62ce9a7aa861237a5a790437958bf5316", + "usage_multiplier": 1, + "extra_info": { + "job_id": "573cbacd-aee3-4655-a44c-1d721bcb49ed", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anonc7614baf_c661_4d17_aba2_f7b43763a7e2" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "table_from_another_project" + } + ] + } + }, + { + "query": "\n SELECT last_modified_time\n FROM gcp-staging-2.dataset_as_sharded_table.__TABLES__\n WHERE table_id=\"abc2023\"\n AND last_modified_time >= 1724236154390\n AND last_modified_time <= 1724322554390\n LIMIT 5\n ;", + "session_id": null, + "timestamp": "2024-08-22 10:29:14.876000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "2c44fb03f02e7c6777499bbd1a0e15b075d39b85c2bb544223f01a9e5ce4656d", + "usage_multiplier": 1, + "extra_info": { + "job_id": "7a627727-a4da-45fa-b504-147ea8f7fcf8", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anonf45b3a81_0bc1_413b_b39f_04a62fdf41b7" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "dataset_as_sharded_table", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "select revenue, date_utc from gcp-staging-2.smoke_test_db_4.sharded_table1_20230101", + "session_id": null, + "timestamp": "2024-08-22 10:29:17.063000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "691a01378badc5f66308c9cd67305ff487ab5b1208c9cef4fd0e7b0d12401e72", + "usage_multiplier": 1, + "extra_info": { + "job_id": "a2089528-786c-4cde-b87b-61a88a17d7fa", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anone3b99b14_5cf1_4a38_97a5_b435d004fcc8" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "smoke_test_db_4", + "table_id": "sharded_table1_20230101" + } + ] + } + }, + { + "query": "\n SELECT COUNT(*)\n FROM gcp-staging.customer_demo.purchase_event\n WHERE amount < 0\n ", + "session_id": null, + "timestamp": "2024-08-22 10:29:17.577000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "9f167fab-f7db-4cce-82d7-8e97c890d9ec", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon60e011141b8e3c85fb386e55d995a8290fc3a0896c26ac6d5717f14f5f163947" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT COUNT(*)\n FROM gcp-staging.customer_demo.purchase_event\n WHERE quantity < 0\n ", + "session_id": null, + "timestamp": "2024-08-22 10:29:18.636000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "3098ddd6-b6a3-45a2-9905-8521bea14486", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon32e6bd0458339a10581928d131db191eaea3e29936a3454e7946ed8a12f17e38" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT last_modified_time\n FROM gcp-staging-2.dataset_as_sharded_table.__TABLES__\n WHERE table_id=\"abc2023\"\n AND last_modified_time >= 1724236157888\n AND last_modified_time <= 1724322557888\n LIMIT 5\n ;", + "session_id": null, + "timestamp": "2024-08-22 10:29:18.676000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "2c44fb03f02e7c6777499bbd1a0e15b075d39b85c2bb544223f01a9e5ce4656d", + "usage_multiplier": 1, + "extra_info": { + "job_id": "91631d91-6208-4f60-9ba7-8cfe67cb4185", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anoncaea99c4_c2ca_465a_984a_07a359657b86" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "dataset_as_sharded_table", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "SELECT COUNT(*) FROM gcp-staging.customer_demo.purchase_event WHERE user_id IS NULL", + "session_id": null, + "timestamp": "2024-08-22 10:29:18.692000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "f93e3148-9c20-4ccb-b7a5-f5608a39a877", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anonb22d0d582afd566e08144b196aec36622576fdf25b6e74a35e044a5b720ce191" + }, + "referenced_tables": [] + } + }, + { + "query": "select revenue, date_utc from gcp-staging-2.smoke_test_db_4.sharded_table1_20230201", + "session_id": null, + "timestamp": "2024-08-22 10:29:20.058000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "f1d54aed9c1d03837b735a53385cb93c1eabff91aa33fa99dfa8d9e7e5ec1ff7", + "usage_multiplier": 1, + "extra_info": { + "job_id": "8e1d105a-52f3-46f9-a4b5-4baa374e4daf", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon01dc59e0_2426_4a26_aa38_abe2a1c214ec" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "smoke_test_db_4", + "table_id": "sharded_table1_20230201" + } + ] + } + }, + { + "query": "\n SELECT last_modified_time\n FROM gcp-staging-2.dataset_as_sharded_table.__TABLES__\n WHERE table_id=\"abc2023\"\n AND last_modified_time >= 1724236161244\n AND last_modified_time <= 1724322561244\n LIMIT 5\n ;", + "session_id": null, + "timestamp": "2024-08-22 10:29:21.866000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "2c44fb03f02e7c6777499bbd1a0e15b075d39b85c2bb544223f01a9e5ce4656d", + "usage_multiplier": 1, + "extra_info": { + "job_id": "6177dad2-5bb9-4512-84da-5a606a8ecde0", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anona85c4226_4415_4833_bd70_f3509f3b647d" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "dataset_as_sharded_table", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "\n SELECT COUNT(*)\n FROM gcp-staging.customer_demo.purchase_event\n WHERE amount < 0\n ", + "session_id": null, + "timestamp": "2024-08-22 10:29:22.992000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "f7eac38f-48ec-4f63-8879-1a0ce02e9474", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon60e011141b8e3c85fb386e55d995a8290fc3a0896c26ac6d5717f14f5f163947" + }, + "referenced_tables": [] + } + }, + { + "query": "select revenue, date_utc from `gcp-staging-2.smoke_test_db_4.sharded_table1_*`", + "session_id": null, + "timestamp": "2024-08-22 10:29:23.212000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "a394d7e82168ab8dd90e6ff402fa06f3a3043be775803f3831477534a444e421", + "usage_multiplier": 1, + "extra_info": { + "job_id": "a5707bee-8e81-46a3-a714-9c65347f2258", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anonf0916716_a817_4feb_9357_09b7219c0890" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "smoke_test_db_4", + "table_id": "sharded_table1_*" + } + ] + } + }, + { + "query": "\n select revenue from gcp-staging.smoke_test_db.partition_test t\n where cast(t.date_utc as DATE) < (select max(date_utc) from gcp-staging.smoke_test_db.partition_test)\n", + "session_id": null, + "timestamp": "2024-08-22 10:29:26.682000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "3a774cee689b0b7d4d3d1268ec5a7ea5c32f5ae16be5b452786b82b528599e74", + "usage_multiplier": 1, + "extra_info": { + "job_id": "0b0ac8b9-c9a8-461a-99b1-4408c2468e3d", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon128ef087_3e96_433e_a49c_7c3dbeda89f9" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "partition_test" + } + ] + } + }, + { + "query": "\n select extract(month from date_utc) month, count(*) count, sum(revenue) gross from gcp-staging.smoke_test_db.partition_test\n group by month\n order by gross\n", + "session_id": null, + "timestamp": "2024-08-22 10:29:28.369000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "9b6d48846141019d32c4c9f2660f49c8ea9659ef1c530fa14e3e1b831553d100", + "usage_multiplier": 1, + "extra_info": { + "job_id": "e7455bfd-9fea-409c-82a0-259088e942a4", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon441d7e56_e71e_4184_b282_50122f28ab8f" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "partition_test" + } + ] + } + }, + { + "query": "\n select revenue from gcp-staging.smoke_test_db.partition_test t\n where cast(t.date_utc as DATE) < (select max(date_utc) from gcp-staging.smoke_test_db.partition_test)\n", + "session_id": null, + "timestamp": "2024-08-22 10:29:29.856000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "3a774cee689b0b7d4d3d1268ec5a7ea5c32f5ae16be5b452786b82b528599e74", + "usage_multiplier": 1, + "extra_info": { + "job_id": "032ad33c-8d21-4fe0-8013-e7f4bfef7b91", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anona0ecfb8a_d416_41c5_92c7_ddeb71cc3872" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "partition_test" + } + ] + } + }, + { + "query": "\n select extract(month from date_utc) month, count(*) count, sum(revenue) gross from gcp-staging.smoke_test_db.partition_test\n group by month\n order by gross\n", + "session_id": null, + "timestamp": "2024-08-22 10:29:31.415000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "9b6d48846141019d32c4c9f2660f49c8ea9659ef1c530fa14e3e1b831553d100", + "usage_multiplier": 1, + "extra_info": { + "job_id": "b7eab270-0938-40f2-b2c7-9608feb71219", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon1f7efd21_bc80_4a18_b307_46415df75599" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "partition_test" + } + ] + } + }, + { + "query": "\n select revenue from gcp-staging.smoke_test_db.partition_test t\n where cast(t.date_utc as DATE) < (select max(date_utc) from gcp-staging.smoke_test_db.partition_test)\n", + "session_id": null, + "timestamp": "2024-08-22 10:29:33.082000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "3a774cee689b0b7d4d3d1268ec5a7ea5c32f5ae16be5b452786b82b528599e74", + "usage_multiplier": 1, + "extra_info": { + "job_id": "68f63699-3ed3-4b36-b956-a38f9f72d721", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon2a405ce1_4a34_45c2_b741_eb862f9bd688" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "partition_test" + } + ] + } + }, + { + "query": "\n select extract(month from date_utc) month, count(*) count, sum(revenue) gross from gcp-staging.smoke_test_db.partition_test\n group by month\n order by gross\n", + "session_id": null, + "timestamp": "2024-08-22 10:29:34.811000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "9b6d48846141019d32c4c9f2660f49c8ea9659ef1c530fa14e3e1b831553d100", + "usage_multiplier": 1, + "extra_info": { + "job_id": "83e0e55f-be23-422c-a741-88253937e178", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anona86a38fe_7ba8_4625_bd80_08b83836dc74" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "partition_test" + } + ] + } + }, + { + "query": "\n select revenue from gcp-staging.smoke_test_db.partition_test t\n where cast(t.date_utc as DATE) < (select max(date_utc) from gcp-staging.smoke_test_db.partition_test)\n", + "session_id": null, + "timestamp": "2024-08-22 10:29:36.374000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "3a774cee689b0b7d4d3d1268ec5a7ea5c32f5ae16be5b452786b82b528599e74", + "usage_multiplier": 1, + "extra_info": { + "job_id": "98c3b073-7686-4c4e-9f12-fb18d9d084f5", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anonced0b673_cde3_4a7e_85fb_b14a3f15729d" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "partition_test" + } + ] + } + }, + { + "query": "\n select extract(month from date_utc) month, count(*) count, sum(revenue) gross from gcp-staging.smoke_test_db.partition_test\n group by month\n order by gross\n", + "session_id": null, + "timestamp": "2024-08-22 10:29:37.890000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "9b6d48846141019d32c4c9f2660f49c8ea9659ef1c530fa14e3e1b831553d100", + "usage_multiplier": 1, + "extra_info": { + "job_id": "aa89ab39-11e7-45ce-b9fd-181e8f16843d", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon551db81a_c63c_40b3_b8d5_4ecceabe1c68" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "partition_test" + } + ] + } + }, + { + "query": "\n select revenue from gcp-staging.smoke_test_db.partition_test t\n where cast(t.date_utc as DATE) < (select max(date_utc) from gcp-staging.smoke_test_db.partition_test)\n", + "session_id": null, + "timestamp": "2024-08-22 10:29:39.451000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "3a774cee689b0b7d4d3d1268ec5a7ea5c32f5ae16be5b452786b82b528599e74", + "usage_multiplier": 1, + "extra_info": { + "job_id": "c955f68d-7d2e-4c8d-978b-1b51496390d2", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anone845e07d_3c57_4dad_930c_c9c2eb4e27ad" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "partition_test" + } + ] + } + }, + { + "query": "\n select extract(month from date_utc) month, count(*) count, sum(revenue) gross from gcp-staging.smoke_test_db.partition_test\n group by month\n order by gross\n", + "session_id": null, + "timestamp": "2024-08-22 10:29:41.032000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "9b6d48846141019d32c4c9f2660f49c8ea9659ef1c530fa14e3e1b831553d100", + "usage_multiplier": 1, + "extra_info": { + "job_id": "4d279f45-7b97-45f8-a263-88e2d8f31bf0", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon6b4e215f_b542_4941_9274_3ba107aa4955" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "partition_test" + } + ] + } + }, + { + "query": "select value from gcp-staging.smoke_test_db.usage_test", + "session_id": null, + "timestamp": "2024-08-22 10:29:47.575000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "18deb4c7e128343b7ecc2cf59b9c9a06f96d2ce4e66b33bcd2b5c26899d6b9b5", + "usage_multiplier": 1, + "extra_info": { + "job_id": "1bea24f3-644e-4dd4-98d4-5f74d3d3e6d3", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon91770d44_60b5_4727_8f2c_1a4f7ab92866" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "usage_test" + } + ] + } + }, + { + "query": "select value from gcp-staging.smoke_test_db.usage_test", + "session_id": null, + "timestamp": "2024-08-22 10:29:50.777000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "18deb4c7e128343b7ecc2cf59b9c9a06f96d2ce4e66b33bcd2b5c26899d6b9b5", + "usage_multiplier": 1, + "extra_info": { + "job_id": "1d670ea7-1e10-4952-8beb-5fe77bb8f22e", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anonb0bc3965_5bee_4284_92f8_32e838db3f2a" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "usage_test" + } + ] + } + }, + { + "query": "select value from gcp-staging.smoke_test_db.usage_test", + "session_id": null, + "timestamp": "2024-08-22 10:29:54.074000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "18deb4c7e128343b7ecc2cf59b9c9a06f96d2ce4e66b33bcd2b5c26899d6b9b5", + "usage_multiplier": 1, + "extra_info": { + "job_id": "3eb125df-feca-44c0-9896-326b6c34a3bf", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anone59747e9_b4ff_4ccc_b8ba_f3f92eb85f44" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "usage_test" + } + ] + } + }, + { + "query": "select * from gcp-staging.smoke_test_db.usage_test", + "session_id": null, + "timestamp": "2024-08-22 10:29:57.066000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "553f645d24478a4dd699b8deba3a900f3a8907f53105a104f543d08aaef22930", + "usage_multiplier": 1, + "extra_info": { + "job_id": "13e496a2-3160-4bb2-b5d6-b81d61303d8a", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon411f6440_ee8b_4fbe_83f1_e4c99b930da7" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "usage_test" + } + ] + } + }, + { + "query": "SELECT COUNT(*) FROM gcp-staging.customer_demo.purchase_event WHERE user_id IS NULL", + "session_id": null, + "timestamp": "2024-08-22 10:29:58.963000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "19af6f98-b709-47dc-8358-14e2dcc91894", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anonb22d0d582afd566e08144b196aec36622576fdf25b6e74a35e044a5b720ce191" + }, + "referenced_tables": [] + } + }, + { + "query": "select * from gcp-staging.smoke_test_db.usage_test", + "session_id": null, + "timestamp": "2024-08-22 10:30:00.076000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "553f645d24478a4dd699b8deba3a900f3a8907f53105a104f543d08aaef22930", + "usage_multiplier": 1, + "extra_info": { + "job_id": "aa6811f2-bfea-4ec3-a7b5-9d70f0475706", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon42e59724_9567_47eb_aa95_c1ec7418902b" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "usage_test" + } + ] + } + }, + { + "query": "SELECT COUNT(*) FROM gcp-staging.customer_demo.purchase_event WHERE user_id IS NULL", + "session_id": null, + "timestamp": "2024-08-22 10:30:01.034000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "152ed309-796b-4177-b082-82010a4f7e08", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anonb22d0d582afd566e08144b196aec36622576fdf25b6e74a35e044a5b720ce191" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT row_count\n FROM gcp-staging-2.dataset_as_sharded_table.__TABLES__\n WHERE table_id='abc';", + "session_id": null, + "timestamp": "2024-08-22 10:30:02.137000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "81bcba1019e933b2de506faf0f6549b8602519495edc855c0a9267b8d5bb976c", + "usage_multiplier": 1, + "extra_info": { + "job_id": "a1a22c15-873b-453f-80d0-b82b6391d360", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anonc5692ab6_b9b1_4294_adaf_9bc23e242801" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "dataset_as_sharded_table", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "SELECT AVG(amount) FROM gcp-staging.customer_demo.purchase_event", + "session_id": null, + "timestamp": "2024-08-22 10:30:02.168000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "4e603b0a-2c6d-4c32-acde-0646f680a84f", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon4db3da963edf09d1d3e6b8dddc9f22960bb56016490ef27620acc390fcf18a27" + }, + "referenced_tables": [] + } + }, + { + "query": "SELECT COUNT(*) \nFROM gcp-staging.customer_demo.purchase_event \nWHERE amount < 0", + "session_id": null, + "timestamp": "2024-08-22 10:30:02.202000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "8c807377-da03-4a71-bb46-a191792ff0bf", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon84323ce6f50f99cfc5d2e8ad640f3923743cb41a6a759f18a7fce13c7a358d28" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT last_modified_time\n FROM gcp-staging-2.dataset_as_sharded_table.__TABLES__\n WHERE table_id=\"abc\"\n AND last_modified_time >= 1724322506419\n AND last_modified_time <= 1724322600205\n LIMIT 5\n ;", + "session_id": null, + "timestamp": "2024-08-22 10:30:02.282000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "2c44fb03f02e7c6777499bbd1a0e15b075d39b85c2bb544223f01a9e5ce4656d", + "usage_multiplier": 1, + "extra_info": { + "job_id": "0502a1a5-bb81-4926-96ca-c6dbf496f3d7", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon02406632_13ec_4bb5_8272_9241dc8485cf" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "dataset_as_sharded_table", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "\n select extract(month from date_utc) month, count(*) count, sum(value) total from gcp-staging.smoke_test_db.usage_test\n group by month\n order by total\n ", + "session_id": null, + "timestamp": "2024-08-22 10:30:02.823000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "2619b42bcfd850c2f6df3a72e3b4e53a635a7169d09facdfa4269366f3a48280", + "usage_multiplier": 1, + "extra_info": { + "job_id": "2f1c758f-62a5-45ba-9d2e-769bed74a333", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon8af21009_09ce_4944_a5ad_6a57c73352f1" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "usage_test" + } + ] + } + }, + { + "query": "\n select name, post_abbr from `gcp-staging-2.smoke_test_db_4.external_table_us_states`\n", + "session_id": null, + "timestamp": "2024-08-22 10:30:05.248000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "508134047ff2cc6a25ed279ebf1541b1681e0cb7eb22e1cfacd9fd5a51c8efd6", + "usage_multiplier": 1, + "extra_info": { + "job_id": "2e204e78-a2a1-4b5f-90f2-a0a6662cd4b1", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon28991a9f_ae0c_4016_b1b7_7d9346528fb8" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "smoke_test_db_4", + "table_id": "external_table_us_states" + } + ] + } + }, + { + "query": "\n SELECT\n first_name,\n last_name,\n dob,\n addresses[offset(0)].address,\n addresses[offset(0)].city\n FROM \n gcp-staging-2.smoke_test_db_4.table_with_nested_fields\n", + "session_id": null, + "timestamp": "2024-08-22 10:30:06.806000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "71b4e0082e4005e45c1e32199147cc5d02491a45c225065c8bbdc6576f4724ea", + "usage_multiplier": 1, + "extra_info": { + "job_id": "3305dfa5-708c-4955-b240-0657bcddaa04", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anonfcea9c21_50a4_41c2_82bc_31e8d23874bd" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "smoke_test_db_4", + "table_id": "table_with_nested_fields" + } + ] + } + }, + { + "query": "\n select \n transaction_id \n from \n `gcp-staging-2.smoke_test_db_4.table_with_ingestion_time_partition` \n where \n _PARTITIONDATE = CURRENT_DATE()\n", + "session_id": null, + "timestamp": "2024-08-22 10:30:08.186000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "6711368fa67682497ba5ff5dbbce9302950b990da8951d52f83a59385e22e9ca", + "usage_multiplier": 1, + "extra_info": { + "job_id": "878f73b2-9ff7-4d40-b8c4-81c72de46922", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon385fe8ae_2321_4782_ac22_9be1ec64a4a2" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "smoke_test_db_4", + "table_id": "table_with_ingestion_time_partition" + } + ] + } + }, + { + "query": "\n SELECT\n customer_id,\n date1\n FROM\n `gcp-staging-2.smoke_test_db_4.table_with_integer_range_partition`\n WHERE\n customer_id=1\n", + "session_id": null, + "timestamp": "2024-08-22 10:30:09.673000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "6690a922f76f680ab004c39b375979b610546bb4fd20d498bb4a83c94ce2d476", + "usage_multiplier": 1, + "extra_info": { + "job_id": "cf03003b-4142-461f-b199-2edb45cda9bf", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anondcc3a621_6750_4415_95c6_7af5662e95cb" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "smoke_test_db_4", + "table_id": "table_with_integer_range_partition" + } + ] + } + }, + { + "query": "select revenue, date_utc from gcp-staging.smoke_test_db.base_table FOR SYSTEM_TIME AS OF TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 1 HOUR)", + "session_id": null, + "timestamp": "2024-08-22 10:30:11.320000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "1a20947e51269d384a3e3e8e33c755633f158543f56881c7906d27a3df5a99b8", + "usage_multiplier": 1, + "extra_info": { + "job_id": "771f3da2-a218-456e-a0a1-f22e15410442", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anonc39cfdd0_cf93_4e71_98a8_1ecdad5bdad9" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "base_table@1724319011327" + } + ] + } + }, + { + "query": "select revenue, date_utc from [gcp-staging-2.smoke_test_db_3.base_table_2@0]", + "session_id": null, + "timestamp": "2024-08-22 10:30:14.269000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "c24065c5-8f8e-4aad-a2fa-c856f517a906", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon12a7e0e3_25d3_4121_82ba_8700fa152525" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "smoke_test_db_3", + "table_id": "base_table_2@0" + } + ] + } + }, + { + "query": "\n SELECT row_count\n FROM gcp-staging-2.dataset_as_sharded_table.__TABLES__\n WHERE table_id='abc';", + "session_id": null, + "timestamp": "2024-08-22 10:31:01.120000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "81bcba1019e933b2de506faf0f6549b8602519495edc855c0a9267b8d5bb976c", + "usage_multiplier": 1, + "extra_info": { + "job_id": "66e0e6f8-a780-4755-9f65-c3b51107ef96", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon471d3729_9b83_47b1_80f9_a5a41453441e" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "dataset_as_sharded_table", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "SELECT AVG(amount) FROM gcp-staging.customer_demo.purchase_event", + "session_id": null, + "timestamp": "2024-08-22 10:31:01.179000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "279851a7-44ce-477a-8200-3c52dc5d9b1a", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon4db3da963edf09d1d3e6b8dddc9f22960bb56016490ef27620acc390fcf18a27" + }, + "referenced_tables": [] + } + }, + { + "query": "SELECT COUNT(*) \nFROM gcp-staging.customer_demo.purchase_event \nWHERE amount < 0", + "session_id": null, + "timestamp": "2024-08-22 10:31:01.192000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "98febd5c-417d-4d5b-91aa-58ebbccfa2c0", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon84323ce6f50f99cfc5d2e8ad640f3923743cb41a6a759f18a7fce13c7a358d28" + }, + "referenced_tables": [] + } + }, + { + "query": "SELECT COUNT(*) \nFROM gcp-staging.customer_demo.purchase_event \nWHERE amount < 0", + "session_id": null, + "timestamp": "2024-08-22 10:32:01.120000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "ecaf762e-2b22-4abb-9e67-ff6aee05c046", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon84323ce6f50f99cfc5d2e8ad640f3923743cb41a6a759f18a7fce13c7a358d28" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT row_count\n FROM gcp-staging-2.dataset_as_sharded_table.__TABLES__\n WHERE table_id='abc';", + "session_id": null, + "timestamp": "2024-08-22 10:32:01.234000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "81bcba1019e933b2de506faf0f6549b8602519495edc855c0a9267b8d5bb976c", + "usage_multiplier": 1, + "extra_info": { + "job_id": "9c99e087-c5fb-4a93-bb72-852573578dbe", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anond4824d79_f127_4f96_b17f_17f67aa2859b" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "dataset_as_sharded_table", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "SELECT AVG(amount) FROM gcp-staging.customer_demo.purchase_event", + "session_id": null, + "timestamp": "2024-08-22 10:32:01.252000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "ad3a2684-c680-4509-b849-3bcc4f9a7231", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon4db3da963edf09d1d3e6b8dddc9f22960bb56016490ef27620acc390fcf18a27" + }, + "referenced_tables": [] + } + }, + { + "query": "SELECT AVG(amount) FROM gcp-staging.customer_demo.purchase_event", + "session_id": null, + "timestamp": "2024-08-22 10:33:00.571000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "df8f9856-3809-4b55-afc1-76087191c772", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon4db3da963edf09d1d3e6b8dddc9f22960bb56016490ef27620acc390fcf18a27" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT row_count\n FROM gcp-staging-2.dataset_as_sharded_table.__TABLES__\n WHERE table_id='abc';", + "session_id": null, + "timestamp": "2024-08-22 10:33:01.712000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "81bcba1019e933b2de506faf0f6549b8602519495edc855c0a9267b8d5bb976c", + "usage_multiplier": 1, + "extra_info": { + "job_id": "b8f9703c-bb4e-43f1-b701-638bacf79f50", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon3bd064cd_a083_47dc_8af1_5794e160f59a" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "dataset_as_sharded_table", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "\n SELECT last_modified_time\n FROM gcp-staging-2.dataset_as_sharded_table.__TABLES__\n WHERE table_id=\"abc2023\"\n AND last_modified_time >= 1724236667305\n AND last_modified_time <= 1724323067305\n LIMIT 5\n ;", + "session_id": null, + "timestamp": "2024-08-22 10:37:48.146000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "2c44fb03f02e7c6777499bbd1a0e15b075d39b85c2bb544223f01a9e5ce4656d", + "usage_multiplier": 1, + "extra_info": { + "job_id": "d04d3fb8-bb99-4028-b596-c951ede50ca6", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anone16d436b_d361_45f5_8b54_7f859d609acd" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "dataset_as_sharded_table", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "\n SELECT COUNT(*)\n FROM gcp-staging.customer_demo.purchase_event\n WHERE amount < 0\n ", + "session_id": null, + "timestamp": "2024-08-22 10:37:48.300000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "96f20c34-3956-4c47-a13b-c669fc4c7f63", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon60e011141b8e3c85fb386e55d995a8290fc3a0896c26ac6d5717f14f5f163947" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT row_count\n FROM gcp-staging.customer_demo.__TABLES__\n WHERE table_id='purchase_event';", + "session_id": null, + "timestamp": "2024-08-22 10:37:48.793000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "8aed7604ce6319236cca0b0f7d16614774821c2666f5cf6f5be2cec702026d64", + "usage_multiplier": 1, + "extra_info": { + "job_id": "6b73759c-3f08-455b-8df2-dd1bc7dc0902", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon08572e99_180f_4e41_9323_7186634593fe" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "customer_demo", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "SELECT COUNT(*) \nFROM gcp-staging.customer_demo.purchase_event \nWHERE amount < 0", + "session_id": null, + "timestamp": "2024-08-22 10:37:49.520000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "01a6c13e-cb18-45f4-b40a-38315a2c6430", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon84323ce6f50f99cfc5d2e8ad640f3923743cb41a6a759f18a7fce13c7a358d28" + }, + "referenced_tables": [] + } + }, + { + "query": "SELECT\n (SUM(CASE WHEN amount BETWEEN 0 AND 10 THEN 1 ELSE 0 END) * 100.0) / COUNT(*) AS percentage_in_range\nFROM\n gcp-staging.customer_demo.purchase_event;", + "session_id": null, + "timestamp": "2024-08-22 10:37:49.785000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "4fad02be-37dd-4252-89f3-b4d98b674f29", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anondaaf8a98bb16f9b4fd4e7f6ada08bac86581b8e2a25e55399914c155fd8566b7" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT last_modified_time\n FROM gcp-staging-2.dataset_as_sharded_table.__TABLES__\n WHERE table_id=\"abc\"\n AND last_modified_time >= 1724322603618\n AND last_modified_time <= 1724323070196\n LIMIT 5\n ;", + "session_id": null, + "timestamp": "2024-08-22 10:37:50.757000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "2c44fb03f02e7c6777499bbd1a0e15b075d39b85c2bb544223f01a9e5ce4656d", + "usage_multiplier": 1, + "extra_info": { + "job_id": "55c62d69-99af-4040-bff8-e97fadd3cc90", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon10c6bd04_a9f1_4c8d_bcb1_a1b1f2f80a58" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "dataset_as_sharded_table", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "SELECT COUNT(*) FROM gcp-staging.customer_demo.purchase_event WHERE user_id IS NULL", + "session_id": null, + "timestamp": "2024-08-22 10:37:51.009000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "14b1ce0b-61c7-4fe9-8d9b-839ef1c3ee06", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anonb22d0d582afd566e08144b196aec36622576fdf25b6e74a35e044a5b720ce191" + }, + "referenced_tables": [] + } + }, + { + "query": "SELECT COUNT(*) \nFROM gcp-staging.customer_demo.purchase_event \nWHERE amount < 0", + "session_id": null, + "timestamp": "2024-08-22 10:37:51.359000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "2e5fc864-fc67-416c-9244-448da4e82a78", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon84323ce6f50f99cfc5d2e8ad640f3923743cb41a6a759f18a7fce13c7a358d28" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT row_count\n FROM gcp-staging-2.dataset_as_sharded_table.__TABLES__\n WHERE table_id='abc';", + "session_id": null, + "timestamp": "2024-08-22 10:37:51.944000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "81bcba1019e933b2de506faf0f6549b8602519495edc855c0a9267b8d5bb976c", + "usage_multiplier": 1, + "extra_info": { + "job_id": "68babe3d-54f6-45ff-8b0b-9da7911dae59", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anoncb8a32f3_1aa6_4da8_9d78_eeb08eb16661" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "dataset_as_sharded_table", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "SELECT AVG(amount) FROM gcp-staging.customer_demo.purchase_event", + "session_id": null, + "timestamp": "2024-08-22 10:37:52.220000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "b6c7bb75-35cd-4715-8773-a6abc2f300dc", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon4db3da963edf09d1d3e6b8dddc9f22960bb56016490ef27620acc390fcf18a27" + }, + "referenced_tables": [] + } + }, + { + "query": "SELECT COUNT(*) \nFROM gcp-staging.customer_demo.purchase_event \nWHERE amount < 0", + "session_id": null, + "timestamp": "2024-08-22 10:37:53.407000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "fa4425f7-1d80-42fc-8f90-97a00df580e8", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon84323ce6f50f99cfc5d2e8ad640f3923743cb41a6a759f18a7fce13c7a358d28" + }, + "referenced_tables": [] + } + }, + { + "query": "SELECT AVG(amount) FROM gcp-staging.customer_demo.purchase_event", + "session_id": null, + "timestamp": "2024-08-22 10:37:53.503000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "b23c1455-ddfa-4064-8323-d39aafb2694e", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon4db3da963edf09d1d3e6b8dddc9f22960bb56016490ef27620acc390fcf18a27" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT COUNT(*)\n FROM gcp-staging.customer_demo.purchase_event\n WHERE amount < 0\n ", + "session_id": null, + "timestamp": "2024-08-22 10:37:53.512000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "02810b1a-a070-4ab3-b49a-fe0be8bc99ba", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon60e011141b8e3c85fb386e55d995a8290fc3a0896c26ac6d5717f14f5f163947" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT row_count\n FROM gcp-staging-2.dataset_as_sharded_table.__TABLES__\n WHERE table_id='abc';", + "session_id": null, + "timestamp": "2024-08-22 10:37:53.605000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "81bcba1019e933b2de506faf0f6549b8602519495edc855c0a9267b8d5bb976c", + "usage_multiplier": 1, + "extra_info": { + "job_id": "dfa85a7f-0ff1-4e28-87a1-800574a32c61", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon4f90bad8_7754_4a8e_a9f6_4b468ac7fb4d" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "dataset_as_sharded_table", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "SELECT AVG(amount) FROM gcp-staging.customer_demo.purchase_event", + "session_id": null, + "timestamp": "2024-08-22 10:37:54.799000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "7229d1dc-e6e9-4cf4-81b8-8df0152379d8", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon4db3da963edf09d1d3e6b8dddc9f22960bb56016490ef27620acc390fcf18a27" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT last_modified_time\n FROM gcp-staging.customer_demo.__TABLES__\n WHERE table_id=\"purchase_event\"\n AND last_modified_time >= 1692787100665\n AND last_modified_time <= 1724323100665\n LIMIT 5\n ;", + "session_id": null, + "timestamp": "2024-08-22 10:38:21.339000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "c4076a26606e9f6d6b9fde2855758ba67b1155b110a65f72cf1999fa1facdfa4", + "usage_multiplier": 1, + "extra_info": { + "job_id": "08be753d-4377-43a9-86ec-b6dd147b18a9", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anonb802196a_56ec_43df_9453_dc27b67fc1ee" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "customer_demo", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "\n SELECT last_modified_time\n FROM gcp-staging-2.dataset_as_sharded_table.__TABLES__\n WHERE table_id=\"abc2023\"\n AND last_modified_time >= 1724236704226\n AND last_modified_time <= 1724323104226\n LIMIT 5\n ;", + "session_id": null, + "timestamp": "2024-08-22 10:38:24.871000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "2c44fb03f02e7c6777499bbd1a0e15b075d39b85c2bb544223f01a9e5ce4656d", + "usage_multiplier": 1, + "extra_info": { + "job_id": "361a3095-dfdc-4d2d-af0b-44852398caba", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon4cfceddf_4d07_43d7_8e73_61dc558b48b6" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "dataset_as_sharded_table", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "\n SELECT COUNT(*)\n FROM gcp-staging.customer_demo.purchase_event\n WHERE amount < 0\n ", + "session_id": null, + "timestamp": "2024-08-22 10:38:26.351000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "33f0ba19-1925-4063-bb6c-fea0d299f76b", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon60e011141b8e3c85fb386e55d995a8290fc3a0896c26ac6d5717f14f5f163947" + }, + "referenced_tables": [] + } + }, + { + "query": "SELECT COUNT(*) FROM gcp-staging.customer_demo.purchase_event WHERE user_id IS NULL", + "session_id": null, + "timestamp": "2024-08-22 10:38:27.315000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "f489b9ab-aa90-4ba3-8de8-2e6e2482a15a", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anonb22d0d582afd566e08144b196aec36622576fdf25b6e74a35e044a5b720ce191" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT last_modified_time\n FROM gcp-staging-2.dataset_as_sharded_table.__TABLES__\n WHERE table_id=\"abc2023\"\n AND last_modified_time >= 1724236707152\n AND last_modified_time <= 1724323107152\n LIMIT 5\n ;", + "session_id": null, + "timestamp": "2024-08-22 10:38:27.719000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "2c44fb03f02e7c6777499bbd1a0e15b075d39b85c2bb544223f01a9e5ce4656d", + "usage_multiplier": 1, + "extra_info": { + "job_id": "b5571b2d-602d-4d66-a6cd-77772f72656f", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anonb6690e00_3f56_44ac_8ab9_03b7c3c34bbe" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "dataset_as_sharded_table", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "\n SELECT row_count\n FROM gcp-staging-2.dataset_as_sharded_table.__TABLES__\n WHERE table_id='abc';", + "session_id": null, + "timestamp": "2024-08-22 10:38:28.175000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "81bcba1019e933b2de506faf0f6549b8602519495edc855c0a9267b8d5bb976c", + "usage_multiplier": 1, + "extra_info": { + "job_id": "c768ca60-e0cb-41dc-8bab-8117d2a8dd54", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon8cc6e9e7_16fd_490b_86e4_dc8c4b60cc22" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "dataset_as_sharded_table", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "\n SELECT COUNT(*)\n FROM gcp-staging.customer_demo.purchase_event\n WHERE quantity < 0\n ", + "session_id": null, + "timestamp": "2024-08-22 10:38:28.678000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "327849f9-eea0-4428-89ef-7188bafafbc0", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon32e6bd0458339a10581928d131db191eaea3e29936a3454e7946ed8a12f17e38" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT last_modified_time\n FROM gcp-staging-2.dataset_as_sharded_table.__TABLES__\n WHERE table_id=\"abc\"\n AND last_modified_time >= 1724323071874\n AND last_modified_time <= 1724323108983\n LIMIT 5\n ;", + "session_id": null, + "timestamp": "2024-08-22 10:38:29.577000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "2c44fb03f02e7c6777499bbd1a0e15b075d39b85c2bb544223f01a9e5ce4656d", + "usage_multiplier": 1, + "extra_info": { + "job_id": "19e77df4-43aa-4ab5-85d2-e3be1621832b", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon12e0faf9_b2b4_4b8e_acd5_5bda7dc52803" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "dataset_as_sharded_table", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "SELECT COUNT(*) \nFROM gcp-staging.customer_demo.purchase_event \nWHERE amount < 0", + "session_id": null, + "timestamp": "2024-08-22 10:38:31.050000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "4f21cc35-0ad1-451f-b087-e11e8c469a9c", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon84323ce6f50f99cfc5d2e8ad640f3923743cb41a6a759f18a7fce13c7a358d28" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT row_count\n FROM gcp-staging-2.dataset_as_sharded_table.__TABLES__\n WHERE table_id='abc';", + "session_id": null, + "timestamp": "2024-08-22 10:38:34.897000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "81bcba1019e933b2de506faf0f6549b8602519495edc855c0a9267b8d5bb976c", + "usage_multiplier": 1, + "extra_info": { + "job_id": "886c3952-b0b6-452e-b7f1-c45ff2c24e67", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anonfe55e7f9_741d_441e_a27b_47696d24ccaf" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "dataset_as_sharded_table", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "SELECT COUNT(*) \nFROM gcp-staging.customer_demo.purchase_event \nWHERE amount < 0", + "session_id": null, + "timestamp": "2024-08-22 10:38:35.783000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "15855ee2-041f-4ab8-9b19-03b080fc884d", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon84323ce6f50f99cfc5d2e8ad640f3923743cb41a6a759f18a7fce13c7a358d28" + }, + "referenced_tables": [] + } + }, + { + "query": "SELECT AVG(amount) FROM gcp-staging.customer_demo.purchase_event", + "session_id": null, + "timestamp": "2024-08-22 10:38:37.358000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "c88d7780-92b6-4596-8a40-cfa3db95f058", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon4db3da963edf09d1d3e6b8dddc9f22960bb56016490ef27620acc390fcf18a27" + }, + "referenced_tables": [] + } + }, + { + "query": "SELECT COUNT(*) \nFROM gcp-staging.customer_demo.purchase_event \nWHERE amount < 0", + "session_id": null, + "timestamp": "2024-08-22 10:38:37.485000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "3d6d5760-9b9e-4e73-a638-17779d88233b", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon84323ce6f50f99cfc5d2e8ad640f3923743cb41a6a759f18a7fce13c7a358d28" + }, + "referenced_tables": [] + } + }, + { + "query": "SELECT AVG(amount) FROM gcp-staging.customer_demo.purchase_event", + "session_id": null, + "timestamp": "2024-08-22 10:38:38.698000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "28199aa8-c5cb-445f-939c-73bb7f49fed7", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon4db3da963edf09d1d3e6b8dddc9f22960bb56016490ef27620acc390fcf18a27" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT row_count\n FROM gcp-staging-2.dataset_as_sharded_table.__TABLES__\n WHERE table_id='abc';", + "session_id": null, + "timestamp": "2024-08-22 10:38:38.831000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "81bcba1019e933b2de506faf0f6549b8602519495edc855c0a9267b8d5bb976c", + "usage_multiplier": 1, + "extra_info": { + "job_id": "3733cf41-b202-4a72-822d-04298d9fff57", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon8da3cafa_0dc9_4a37_a70a_d066a541b880" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "dataset_as_sharded_table", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "SELECT COUNT(*) FROM gcp-staging.customer_demo.purchase_event WHERE user_id IS NULL", + "session_id": null, + "timestamp": "2024-08-22 10:38:41.103000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "9ad989d5-81f0-4a2d-baaf-97064f36dde8", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anonb22d0d582afd566e08144b196aec36622576fdf25b6e74a35e044a5b720ce191" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT COUNT(*)\n FROM gcp-staging.customer_demo.purchase_event\n WHERE amount < 0\n ", + "session_id": null, + "timestamp": "2024-08-22 10:38:43.435000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "68aa5358-09cc-4d2f-a6a5-488c729fbbd3", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon60e011141b8e3c85fb386e55d995a8290fc3a0896c26ac6d5717f14f5f163947" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT last_modified_time\n FROM gcp-staging-2.dataset_as_sharded_table.__TABLES__\n WHERE table_id=\"abc2023\"\n AND last_modified_time >= 1724236723957\n AND last_modified_time <= 1724323123957\n LIMIT 5\n ;", + "session_id": null, + "timestamp": "2024-08-22 10:38:44.473000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "2c44fb03f02e7c6777499bbd1a0e15b075d39b85c2bb544223f01a9e5ce4656d", + "usage_multiplier": 1, + "extra_info": { + "job_id": "cae21ce5-5c2d-4151-9d35-bcdb799c9cdd", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon2fb5c10b_43b3_40e8_a854_03b63c1b3502" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "dataset_as_sharded_table", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "SELECT COUNT(*) \nFROM gcp-staging.customer_demo.purchase_event \nWHERE amount < 0", + "session_id": null, + "timestamp": "2024-08-22 10:39:00.857000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "e728c245-07a0-4b19-809b-e44d173b5135", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon84323ce6f50f99cfc5d2e8ad640f3923743cb41a6a759f18a7fce13c7a358d28" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT last_modified_time\n FROM gcp-staging-2.dataset_as_sharded_table.__TABLES__\n WHERE table_id=\"abc\"\n AND last_modified_time >= 1724323110374\n AND last_modified_time <= 1724323140076\n LIMIT 5\n ;", + "session_id": null, + "timestamp": "2024-08-22 10:39:00.970000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "2c44fb03f02e7c6777499bbd1a0e15b075d39b85c2bb544223f01a9e5ce4656d", + "usage_multiplier": 1, + "extra_info": { + "job_id": "cd4e6023-5a4c-4445-99b9-6f7aef393ff1", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anond5cc46f2_7658_4061_8aeb_b2a2dd6688a8" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "dataset_as_sharded_table", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "SELECT COUNT(*) FROM gcp-staging.customer_demo.purchase_event WHERE user_id IS NULL", + "session_id": null, + "timestamp": "2024-08-22 10:39:01.969000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "0787f7d7-a643-439b-9442-baa914362c87", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anonb22d0d582afd566e08144b196aec36622576fdf25b6e74a35e044a5b720ce191" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT row_count\n FROM gcp-staging-2.dataset_as_sharded_table.__TABLES__\n WHERE table_id='abc';", + "session_id": null, + "timestamp": "2024-08-22 10:39:02.760000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "81bcba1019e933b2de506faf0f6549b8602519495edc855c0a9267b8d5bb976c", + "usage_multiplier": 1, + "extra_info": { + "job_id": "b31aacd0-b9d7-4ef6-979a-e825fb106c96", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anonaaf15c74_96c6_4bb6_91dc_e85b02790c73" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "dataset_as_sharded_table", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "SELECT AVG(amount) FROM gcp-staging.customer_demo.purchase_event", + "session_id": null, + "timestamp": "2024-08-22 10:39:02.862000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "9c823051-2be3-4c62-b393-4876ef5af734", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon4db3da963edf09d1d3e6b8dddc9f22960bb56016490ef27620acc390fcf18a27" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT COUNT(*)\n FROM gcp-staging.customer_demo.purchase_event\n WHERE amount < 0\n ", + "session_id": null, + "timestamp": "2024-08-22 10:39:03.200000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "03321ba7-908e-4acf-b858-648654da4249", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon60e011141b8e3c85fb386e55d995a8290fc3a0896c26ac6d5717f14f5f163947" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT last_modified_time\n FROM gcp-staging-2.dataset_as_sharded_table.__TABLES__\n WHERE table_id=\"abc2023\"\n AND last_modified_time >= 1724236746653\n AND last_modified_time <= 1724323146653\n LIMIT 5\n ;", + "session_id": null, + "timestamp": "2024-08-22 10:39:07.316000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "2c44fb03f02e7c6777499bbd1a0e15b075d39b85c2bb544223f01a9e5ce4656d", + "usage_multiplier": 1, + "extra_info": { + "job_id": "b7a3bcf4-e5da-4c8b-b61c-b7324e37823c", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon11845a49_abe5_4720_a257_1af0548c3a71" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "dataset_as_sharded_table", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "\n SELECT COUNT(*)\n FROM gcp-staging.customer_demo.purchase_event\n WHERE amount < 0\n ", + "session_id": null, + "timestamp": "2024-08-22 10:39:09.494000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "7ef7f8e0-f8c2-4a47-a6b5-e8ebe3fc69b2", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon60e011141b8e3c85fb386e55d995a8290fc3a0896c26ac6d5717f14f5f163947" + }, + "referenced_tables": [] + } + }, + { + "query": "SELECT COUNT(*) FROM gcp-staging.customer_demo.purchase_event WHERE user_id IS NULL", + "session_id": null, + "timestamp": "2024-08-22 10:39:15.149000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "cd265738-ae52-4e75-a241-682e601b7408", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anonb22d0d582afd566e08144b196aec36622576fdf25b6e74a35e044a5b720ce191" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT last_modified_time\n FROM gcp-staging-2.dataset_as_sharded_table.__TABLES__\n WHERE table_id=\"abc2023\"\n AND last_modified_time >= 1724236758284\n AND last_modified_time <= 1724323158284\n LIMIT 5\n ;", + "session_id": null, + "timestamp": "2024-08-22 10:39:18.899000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "2c44fb03f02e7c6777499bbd1a0e15b075d39b85c2bb544223f01a9e5ce4656d", + "usage_multiplier": 1, + "extra_info": { + "job_id": "709d978b-c616-4ccf-b0bc-46015c7ea201", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon5e97dfab_c9b9_4fb8_bf50_acb06ce77edc" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "dataset_as_sharded_table", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "SELECT\n (SUM(CASE WHEN amount BETWEEN 0 AND 10 THEN 1 ELSE 0 END) * 100.0) / COUNT(*) AS percentage_in_range\nFROM\n gcp-staging.customer_demo.purchase_event;", + "session_id": null, + "timestamp": "2024-08-22 10:39:27.170000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "5ef872bb-d3c3-4e6e-87b5-9e1838f38563", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anondaaf8a98bb16f9b4fd4e7f6ada08bac86581b8e2a25e55399914c155fd8566b7" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT COUNT(*)\n FROM gcp-staging.customer_demo.purchase_event\n WHERE amount < 0\n ", + "session_id": null, + "timestamp": "2024-08-22 10:39:28.388000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "fb84efb3-c71d-4b80-b717-863b22a0bceb", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon60e011141b8e3c85fb386e55d995a8290fc3a0896c26ac6d5717f14f5f163947" + }, + "referenced_tables": [] + } + }, + { + "query": "SELECT COUNT(*) FROM gcp-staging.customer_demo.purchase_event WHERE user_id IS NULL", + "session_id": null, + "timestamp": "2024-08-22 10:39:30.390000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "de5f59b1-0249-421d-90eb-5103a43b49fd", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anonb22d0d582afd566e08144b196aec36622576fdf25b6e74a35e044a5b720ce191" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT row_count\n FROM gcp-staging.customer_demo.__TABLES__\n WHERE table_id='purchase_event';", + "session_id": null, + "timestamp": "2024-08-22 10:39:31.842000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "8aed7604ce6319236cca0b0f7d16614774821c2666f5cf6f5be2cec702026d64", + "usage_multiplier": 1, + "extra_info": { + "job_id": "438c4b01-ec53-421a-bc5f-d61ed1edaf7f", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon2c5c1f70_9bc9_41c4_a096_93b8bda0a0e9" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "customer_demo", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "SELECT COUNT(*) \nFROM gcp-staging.customer_demo.purchase_event \nWHERE amount < 0", + "session_id": null, + "timestamp": "2024-08-22 10:40:01.278000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "6ee9b8d0-7767-447f-b333-55d5265d4388", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon84323ce6f50f99cfc5d2e8ad640f3923743cb41a6a759f18a7fce13c7a358d28" + }, + "referenced_tables": [] + } + }, + { + "query": "SELECT AVG(amount) FROM gcp-staging.customer_demo.purchase_event", + "session_id": null, + "timestamp": "2024-08-22 10:40:01.332000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "b2ffdb5b-52cd-44b9-a2d9-38067e9cd9a4", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon4db3da963edf09d1d3e6b8dddc9f22960bb56016490ef27620acc390fcf18a27" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT row_count\n FROM gcp-staging-2.dataset_as_sharded_table.__TABLES__\n WHERE table_id='abc';", + "session_id": null, + "timestamp": "2024-08-22 10:40:01.488000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "81bcba1019e933b2de506faf0f6549b8602519495edc855c0a9267b8d5bb976c", + "usage_multiplier": 1, + "extra_info": { + "job_id": "bd6599fe-13b0-4fe8-8981-c307ccd4448d", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon2072d2c5_e133_4b5a_8945_5254588340a3" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "dataset_as_sharded_table", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "\n SELECT last_modified_time\n FROM gcp-staging-2.dataset_as_sharded_table.__TABLES__\n WHERE table_id=\"abc2023\"\n AND last_modified_time >= 1724236803602\n AND last_modified_time <= 1724323203602\n LIMIT 5\n ;", + "session_id": null, + "timestamp": "2024-08-22 10:40:04.214000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "2c44fb03f02e7c6777499bbd1a0e15b075d39b85c2bb544223f01a9e5ce4656d", + "usage_multiplier": 1, + "extra_info": { + "job_id": "9c6f7727-ca89-4ad3-b29f-f4cc21b195e2", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anonb0ef55ef_0e63_4138_b8c0_49b113c4ea78" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "dataset_as_sharded_table", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "\n SELECT last_modified_time\n FROM gcp-staging.customer_demo.__TABLES__\n WHERE table_id=\"purchase_event\"\n AND last_modified_time >= 1692787205076\n AND last_modified_time <= 1724323205076\n LIMIT 5\n ;", + "session_id": null, + "timestamp": "2024-08-22 10:40:05.684000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "c4076a26606e9f6d6b9fde2855758ba67b1155b110a65f72cf1999fa1facdfa4", + "usage_multiplier": 1, + "extra_info": { + "job_id": "12ffdab3-f7ce-4260-9437-80aacfb42e08", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon2ba3f5b8_331b_4fed_9985_0559a69e5bf5" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "customer_demo", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "SELECT COUNT(*) \nFROM gcp-staging.customer_demo.purchase_event \nWHERE amount < 0", + "session_id": null, + "timestamp": "2024-08-22 10:41:00.943000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "31c7e5fd-c5d3-477b-89b8-74c57c1b0fb9", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon84323ce6f50f99cfc5d2e8ad640f3923743cb41a6a759f18a7fce13c7a358d28" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT row_count\n FROM gcp-staging-2.dataset_as_sharded_table.__TABLES__\n WHERE table_id='abc';", + "session_id": null, + "timestamp": "2024-08-22 10:41:00.951000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "81bcba1019e933b2de506faf0f6549b8602519495edc855c0a9267b8d5bb976c", + "usage_multiplier": 1, + "extra_info": { + "job_id": "cfb45092-68bb-4be3-8111-dc9533c9bbe2", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon08499120_ac59_47f0_9d7a_5d3b9e2d95c2" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "dataset_as_sharded_table", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "SELECT AVG(amount) FROM gcp-staging.customer_demo.purchase_event", + "session_id": null, + "timestamp": "2024-08-22 10:41:02.520000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "e6882b25-dc5e-4d49-a916-ee09d2e4bcc6", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon4db3da963edf09d1d3e6b8dddc9f22960bb56016490ef27620acc390fcf18a27" + }, + "referenced_tables": [] + } + }, + { + "query": "SELECT AVG(amount) FROM gcp-staging.customer_demo.purchase_event", + "session_id": null, + "timestamp": "2024-08-22 10:42:00.672000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "d916df78-aca9-4365-aa7e-32cc56929771", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon4db3da963edf09d1d3e6b8dddc9f22960bb56016490ef27620acc390fcf18a27" + }, + "referenced_tables": [] + } + }, + { + "query": "SELECT COUNT(*) \nFROM gcp-staging.customer_demo.purchase_event \nWHERE amount < 0", + "session_id": null, + "timestamp": "2024-08-22 10:42:01.938000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "08737edb-28ad-4dde-887e-c6c4054d94ef", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon84323ce6f50f99cfc5d2e8ad640f3923743cb41a6a759f18a7fce13c7a358d28" + }, + "referenced_tables": [] + } + }, + { + "query": "\n SELECT last_modified_time\n FROM gcp-staging-2.dataset_as_sharded_table.__TABLES__\n WHERE table_id=\"abc\"\n AND last_modified_time >= 1724323141731\n AND last_modified_time <= 1724323322589\n LIMIT 5\n ;", + "session_id": null, + "timestamp": "2024-08-22 10:42:03.231000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "2c44fb03f02e7c6777499bbd1a0e15b075d39b85c2bb544223f01a9e5ce4656d", + "usage_multiplier": 1, + "extra_info": { + "job_id": "914aafd8-5543-42d5-b21b-92df47c28268", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon4cd2e7fe_9f6f_4a6c_9c92_ff521c7caa99" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "dataset_as_sharded_table", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "\n SELECT row_count\n FROM gcp-staging-2.dataset_as_sharded_table.__TABLES__\n WHERE table_id='abc';", + "session_id": null, + "timestamp": "2024-08-22 10:42:04.926000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test", + "default_db": "gcp-staging", + "default_schema": "_SESSION", + "query_hash": "81bcba1019e933b2de506faf0f6549b8602519495edc855c0a9267b8d5bb976c", + "usage_multiplier": 1, + "extra_info": { + "job_id": "177a3d11-f04f-4819-9b67-555b3190af6a", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging", + "dataset_id": "_283d0dc3d7613bc3d88b6692661164402822056f", + "table_id": "anon2f5817f8_15ba_4df7_83c9_412d70c62fce" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "dataset_as_sharded_table", + "table_id": "__TABLES__" + } + ] + } + }, + { + "query": "select revenue from gcp-staging.smoke_test_db.base_table", + "session_id": null, + "timestamp": "2024-08-22 10:28:37.386000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test-2", + "default_db": "gcp-staging-2", + "default_schema": "_SESSION", + "query_hash": "39323692dfca0074ed7ac63c27830a64ce68742109e09033015b854d46be7dde", + "usage_multiplier": 1, + "extra_info": { + "job_id": "1b9741fb-0693-4371-acd0-e7c77a775aa0", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging-2", + "dataset_id": "_9b92e769ee331eccc38faf408e659980e15ac5cb", + "table_id": "anona358818e_bfc6_4373_b610_778f49467cef" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "base_table" + }, + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "materialized_view_from_table" + } + ] + } + }, + { + "query": "select revenue from gcp-staging.smoke_test_db.partition_test", + "session_id": null, + "timestamp": "2024-08-22 10:28:41.153000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test-2", + "default_db": "gcp-staging-2", + "default_schema": "_SESSION", + "query_hash": "6e1d1e89de700713d87b8de79d43da4d437ca0d98a5589385104ee7b4142f922", + "usage_multiplier": 1, + "extra_info": { + "job_id": "c2cec833-2d60-42b0-994b-604ec92a88ae", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging-2", + "dataset_id": "_9b92e769ee331eccc38faf408e659980e15ac5cb", + "table_id": "anonbcd279fa_10aa_422d_a999_2383b3e8c932" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "partition_test" + } + ] + } + }, + { + "query": "select revenue from gcp-staging.smoke_test_db.lineage_from_tmp_table", + "session_id": null, + "timestamp": "2024-08-22 10:28:44.291000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test-2", + "default_db": "gcp-staging-2", + "default_schema": "_SESSION", + "query_hash": "8d3f968d2fbae03772fedb7e2dd43c2ba66ae90aa35e4824d40fd66997d51681", + "usage_multiplier": 1, + "extra_info": { + "job_id": "ac0b567c-b711-489e-93ea-1a4696055163", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging-2", + "dataset_id": "_9b92e769ee331eccc38faf408e659980e15ac5cb", + "table_id": "anonc046b2fa_61f1_48da_9df1_5e517b6b1901" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "lineage_from_tmp_table" + } + ] + } + }, + { + "query": "select revenue from gcp-staging.smoke_test_db.lineage_from_base", + "session_id": null, + "timestamp": "2024-08-22 10:28:47.479000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test-2", + "default_db": "gcp-staging-2", + "default_schema": "_SESSION", + "query_hash": "1f52fe6a0e09ab08bb65356947420bc85f0cc1bb38557fcdc7e33a049e6b113f", + "usage_multiplier": 1, + "extra_info": { + "job_id": "8ea9e3a5-4ef2-4665-8ad2-c5aeaf4ce063", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging-2", + "dataset_id": "_9b92e769ee331eccc38faf408e659980e15ac5cb", + "table_id": "anone04f0dcf_9a17_4be8_92c4_92b31811d167" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "lineage_from_base" + } + ] + } + }, + { + "query": "select revenue from gcp-staging.smoke_test_db.view_from_table", + "session_id": null, + "timestamp": "2024-08-22 10:28:50.411000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test-2", + "default_db": "gcp-staging-2", + "default_schema": "_SESSION", + "query_hash": "d0726cb4d89262a037a6d3208dfa36439952b1fa95bcb6ea35fa6c46e46d5b5a", + "usage_multiplier": 1, + "extra_info": { + "job_id": "1bf06f7f-1d21-413e-b26a-ca4569586cb4", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging-2", + "dataset_id": "_9b92e769ee331eccc38faf408e659980e15ac5cb", + "table_id": "anon59fa6fb5_e019_4328_8f23_01f38f7b5e3a" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "materialized_view_from_table" + }, + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "base_table" + } + ] + } + }, + { + "query": "select revenue from gcp-staging.smoke_test_db.view_from_view_on_table", + "session_id": null, + "timestamp": "2024-08-22 10:28:53.604000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test-2", + "default_db": "gcp-staging-2", + "default_schema": "_SESSION", + "query_hash": "5a8d7c9836a318c4be63fcc8f38e460b12c2b070a5daa98dc2c004ff53b29c87", + "usage_multiplier": 1, + "extra_info": { + "job_id": "3e577d7c-7ccc-4be8-a19e-e1399fabee69", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging-2", + "dataset_id": "_9b92e769ee331eccc38faf408e659980e15ac5cb", + "table_id": "anon6d70c7aa_a77d_402a_96e0_414de3d1c7fb" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "materialized_view_from_table" + }, + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "base_table" + } + ] + } + }, + { + "query": "select revenue from gcp-staging.smoke_test_db.table_from_view", + "session_id": null, + "timestamp": "2024-08-22 10:28:56.837000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test-2", + "default_db": "gcp-staging-2", + "default_schema": "_SESSION", + "query_hash": "827d388f24f2f2a7961260884a76d0e6449ba200efd12d2500ae95350c63c53f", + "usage_multiplier": 1, + "extra_info": { + "job_id": "35d712b5-ce79-43db-98bb-52b6363a5ef3", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging-2", + "dataset_id": "_9b92e769ee331eccc38faf408e659980e15ac5cb", + "table_id": "anonb6ff5e62_e4b7_4f9a_8fa7_7e86d3a6a7e4" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "table_from_view" + } + ] + } + }, + { + "query": "select revenue from gcp-staging.smoke_test_db.table_from_view_and_table", + "session_id": null, + "timestamp": "2024-08-22 10:28:59.969000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test-2", + "default_db": "gcp-staging-2", + "default_schema": "_SESSION", + "query_hash": "e0f5980f7d9741829dc2b3183b1ea065e9f5f6e8cb7650c10eeb3398c10d4d19", + "usage_multiplier": 1, + "extra_info": { + "job_id": "cb805273-1313-45d0-af6d-fa6d9b3d8b56", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging-2", + "dataset_id": "_9b92e769ee331eccc38faf408e659980e15ac5cb", + "table_id": "anon50fed9a7_008c_4405_b22f_f93efccda484" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "table_from_view_and_table" + } + ] + } + }, + { + "query": "select revenue from gcp-staging.smoke_test_db.materialized_view_from_table", + "session_id": null, + "timestamp": "2024-08-22 10:29:03.069000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test-2", + "default_db": "gcp-staging-2", + "default_schema": "_SESSION", + "query_hash": "59fa2d6828221cc2962cc609935bc4c94a6f3a846e94439f7ff3374bcc439b4e", + "usage_multiplier": 1, + "extra_info": { + "job_id": "32825ee8-28b7-4670-9296-1b67d8978bf6", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging-2", + "dataset_id": "_9b92e769ee331eccc38faf408e659980e15ac5cb", + "table_id": "anona7c9b810_373e_4102_8b6c_74622dbddb92" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "materialized_view_from_table" + }, + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "base_table" + } + ] + } + }, + { + "query": "select revenue from gcp-staging.smoke_test_db.view_from_multiple_tables", + "session_id": null, + "timestamp": "2024-08-22 10:29:06.639000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test-2", + "default_db": "gcp-staging-2", + "default_schema": "_SESSION", + "query_hash": "d6239d9f32f5ddd664255bb3629824c5a099e9a0213a484174fa6af613a3bc16", + "usage_multiplier": 1, + "extra_info": { + "job_id": "1d8c6501-d09c-4540-b62c-0960c6ce4461", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging-2", + "dataset_id": "_9b92e769ee331eccc38faf408e659980e15ac5cb", + "table_id": "anone4cf1322_c0a3_4178_85fe_ed3d25a86cf5" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "base_table" + }, + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "lineage_from_base" + }, + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db_2", + "table_id": "table_from_other_db" + }, + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "materialized_view_from_table" + } + ] + } + }, + { + "query": "select revenue from gcp-staging.smoke_test_db_2.table_from_other_db", + "session_id": null, + "timestamp": "2024-08-22 10:29:09.851000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test-2", + "default_db": "gcp-staging-2", + "default_schema": "_SESSION", + "query_hash": "a5c10c32c0b05d5248e6d337d9fe607998473778b0680e392f7a93ecc7869b1b", + "usage_multiplier": 1, + "extra_info": { + "job_id": "95c21bb0-9a17-4dde-8198-bde46c52d98e", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging-2", + "dataset_id": "_9b92e769ee331eccc38faf408e659980e15ac5cb", + "table_id": "anone376867c_5467_4841_803a_4a547f3ba08b" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db_2", + "table_id": "table_from_other_db" + } + ] + } + }, + { + "query": "select revenue from gcp-staging-2.smoke_test_db_3.base_table_2", + "session_id": null, + "timestamp": "2024-08-22 10:29:12.655000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test-2", + "default_db": "gcp-staging-2", + "default_schema": "_SESSION", + "query_hash": "180575b59ab828f1bb56a49c25da862144fdf1a484773533a5363df180f81919", + "usage_multiplier": 1, + "extra_info": { + "job_id": "cf9d8407-934c-4b98-8fcf-be51bd5fc4b2", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging-2", + "dataset_id": "_9b92e769ee331eccc38faf408e659980e15ac5cb", + "table_id": "anone0788f93_2e16_4300_b94c_f86eee9758c2" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "smoke_test_db_3", + "table_id": "base_table_2" + } + ] + } + }, + { + "query": "select revenue from gcp-staging.smoke_test_db.table_from_another_project", + "session_id": null, + "timestamp": "2024-08-22 10:29:15.601000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test-2", + "default_db": "gcp-staging-2", + "default_schema": "_SESSION", + "query_hash": "69c8f6b58948d2bafceffb46708bb041e1b46830a052530567fc054c752d43b1", + "usage_multiplier": 1, + "extra_info": { + "job_id": "72249ae6-3035-4698-ad2f-54f0f6f58976", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging-2", + "dataset_id": "_9b92e769ee331eccc38faf408e659980e15ac5cb", + "table_id": "anon2608c050_7916_471e_8ba3_c9e004e22daa" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "table_from_another_project" + } + ] + } + }, + { + "query": "select revenue from gcp-staging-2.smoke_test_db_4.sharded_table1_20230101", + "session_id": null, + "timestamp": "2024-08-22 10:29:18.514000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test-2", + "default_db": "gcp-staging-2", + "default_schema": "_SESSION", + "query_hash": "cd3ceb2068c7b0691a03fb47183df1d55b07bd102781ede467de375a68736a51", + "usage_multiplier": 1, + "extra_info": { + "job_id": "7ad97142-5030-4723-ae8c-e48f0dc74d34", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging-2", + "dataset_id": "_9b92e769ee331eccc38faf408e659980e15ac5cb", + "table_id": "anon118ffa8c_5d72_4f8d_b022_b7d7d5e679e2" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "smoke_test_db_4", + "table_id": "sharded_table1_20230101" + } + ] + } + }, + { + "query": "select revenue from gcp-staging-2.smoke_test_db_4.sharded_table1_20230201", + "session_id": null, + "timestamp": "2024-08-22 10:29:21.765000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test-2", + "default_db": "gcp-staging-2", + "default_schema": "_SESSION", + "query_hash": "a3fe1801fa4d68d8413b4b90c4704bcb6c629d8976c84355b7875b10655c9582", + "usage_multiplier": 1, + "extra_info": { + "job_id": "1a903e95-526f-4f2a-b7bb-cf85559557cd", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging-2", + "dataset_id": "_9b92e769ee331eccc38faf408e659980e15ac5cb", + "table_id": "anon5e1130d1_930c_4a12_9c21_7c00388ee11f" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "smoke_test_db_4", + "table_id": "sharded_table1_20230201" + } + ] + } + }, + { + "query": "select revenue from `gcp-staging-2.smoke_test_db_4.sharded_table1_*`", + "session_id": null, + "timestamp": "2024-08-22 10:29:25.044000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test-2", + "default_db": "gcp-staging-2", + "default_schema": "_SESSION", + "query_hash": "8e8d1f83b2d56f70fff7ea7879290c0b26663d185a80a872902ebf2da23bc3b1", + "usage_multiplier": 1, + "extra_info": { + "job_id": "1489ced3-a8db-42f0-b6c1-0b23c2e042e7", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging-2", + "dataset_id": "_9b92e769ee331eccc38faf408e659980e15ac5cb", + "table_id": "anon89622e5a_c0f9_469b_9a9a_2379cac465b6" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "smoke_test_db_4", + "table_id": "sharded_table1_*" + } + ] + } + }, + { + "query": "\n select revenue from gcp-staging.smoke_test_db.partition_test t\n where cast(t.date_utc as DATE) = (select max(date_utc) from gcp-staging.smoke_test_db.partition_test)\n", + "session_id": null, + "timestamp": "2024-08-22 10:29:42.590000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test-2", + "default_db": "gcp-staging-2", + "default_schema": "_SESSION", + "query_hash": "9776768f462eef58c2dbc091f9548e020cf7addc1628cf83a2ea506aa81ece86", + "usage_multiplier": 1, + "extra_info": { + "job_id": "d6479b3a-694b-4271-aa3c-3be5e8b8f7c6", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging-2", + "dataset_id": "_9b92e769ee331eccc38faf408e659980e15ac5cb", + "table_id": "anon9717e692_a817_40f3_8f8c_d2af76a4eed9" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "partition_test" + } + ] + } + }, + { + "query": "\n select revenue from gcp-staging.smoke_test_db.partition_test t\n where cast(t.date_utc as DATE) = (select max(date_utc) from gcp-staging.smoke_test_db.partition_test)\n", + "session_id": null, + "timestamp": "2024-08-22 10:29:44.320000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test-2", + "default_db": "gcp-staging-2", + "default_schema": "_SESSION", + "query_hash": "9776768f462eef58c2dbc091f9548e020cf7addc1628cf83a2ea506aa81ece86", + "usage_multiplier": 1, + "extra_info": { + "job_id": "0e5a166c-4018-4585-a341-a40585343818", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging-2", + "dataset_id": "_9b92e769ee331eccc38faf408e659980e15ac5cb", + "table_id": "anon3453f1d1_c634_4273_b472_c72fac89f718" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "partition_test" + } + ] + } + }, + { + "query": "\n select revenue from gcp-staging.smoke_test_db.partition_test t\n where cast(t.date_utc as DATE) = (select max(date_utc) from gcp-staging.smoke_test_db.partition_test)\n", + "session_id": null, + "timestamp": "2024-08-22 10:29:45.961000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test-2", + "default_db": "gcp-staging-2", + "default_schema": "_SESSION", + "query_hash": "9776768f462eef58c2dbc091f9548e020cf7addc1628cf83a2ea506aa81ece86", + "usage_multiplier": 1, + "extra_info": { + "job_id": "a36c8702-c796-4173-a19a-6b072c5064aa", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging-2", + "dataset_id": "_9b92e769ee331eccc38faf408e659980e15ac5cb", + "table_id": "anond9168686_8f61_47f7_9546_d97b9daf573b" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "partition_test" + } + ] + } + }, + { + "query": "select key from gcp-staging.smoke_test_db.usage_test", + "session_id": null, + "timestamp": "2024-08-22 10:29:49.238000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test-2", + "default_db": "gcp-staging-2", + "default_schema": "_SESSION", + "query_hash": "dbd21e2e4c5f9fe397c76585acdf3b84f72525d0d951d79e8057b05dc8d7c49d", + "usage_multiplier": 1, + "extra_info": { + "job_id": "2360b8ad-4e69-45ac-9294-a528634219b3", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging-2", + "dataset_id": "_9b92e769ee331eccc38faf408e659980e15ac5cb", + "table_id": "anon0bff271e_f2e5_4dd8_ba72_3996ab9a4fbf" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "usage_test" + } + ] + } + }, + { + "query": "select key from gcp-staging.smoke_test_db.usage_test", + "session_id": null, + "timestamp": "2024-08-22 10:29:52.280000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test-2", + "default_db": "gcp-staging-2", + "default_schema": "_SESSION", + "query_hash": "dbd21e2e4c5f9fe397c76585acdf3b84f72525d0d951d79e8057b05dc8d7c49d", + "usage_multiplier": 1, + "extra_info": { + "job_id": "68464689-cf31-4cee-87cc-9cf96db96285", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging-2", + "dataset_id": "_9b92e769ee331eccc38faf408e659980e15ac5cb", + "table_id": "anonced1fae9_3dcb_40ba_a69c_ae6d75133e97" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "usage_test" + } + ] + } + }, + { + "query": "select key from gcp-staging.smoke_test_db.usage_test", + "session_id": null, + "timestamp": "2024-08-22 10:29:55.586000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test-2", + "default_db": "gcp-staging-2", + "default_schema": "_SESSION", + "query_hash": "dbd21e2e4c5f9fe397c76585acdf3b84f72525d0d951d79e8057b05dc8d7c49d", + "usage_multiplier": 1, + "extra_info": { + "job_id": "277b376a-186b-40aa-a64b-34940ed9d5e1", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging-2", + "dataset_id": "_9b92e769ee331eccc38faf408e659980e15ac5cb", + "table_id": "anon27185a25_2daf_481f_8c43_20fbac91241b" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "usage_test" + } + ] + } + }, + { + "query": "select * from gcp-staging.smoke_test_db.usage_test", + "session_id": null, + "timestamp": "2024-08-22 10:29:58.466000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test-2", + "default_db": "gcp-staging-2", + "default_schema": "_SESSION", + "query_hash": "553f645d24478a4dd699b8deba3a900f3a8907f53105a104f543d08aaef22930", + "usage_multiplier": 1, + "extra_info": { + "job_id": "f7010dd4-4fd8-42eb-a973-844a161d73ac", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging-2", + "dataset_id": "_9b92e769ee331eccc38faf408e659980e15ac5cb", + "table_id": "anon1fd96c89_d1e8_44f4_b714_bdf425f3920a" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "usage_test" + } + ] + } + }, + { + "query": "select * from gcp-staging.smoke_test_db.usage_test", + "session_id": null, + "timestamp": "2024-08-22 10:30:01.431000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test-2", + "default_db": "gcp-staging-2", + "default_schema": "_SESSION", + "query_hash": "553f645d24478a4dd699b8deba3a900f3a8907f53105a104f543d08aaef22930", + "usage_multiplier": 1, + "extra_info": { + "job_id": "8bd62522-3803-431d-8970-caf0c9f64726", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging-2", + "dataset_id": "_9b92e769ee331eccc38faf408e659980e15ac5cb", + "table_id": "anon026ba4a8_61fe_40e0_bbac_c2745e7e989b" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "usage_test" + } + ] + } + }, + { + "query": "select revenue from gcp-staging.smoke_test_db.base_table FOR SYSTEM_TIME AS OF TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 1 HOUR)", + "session_id": null, + "timestamp": "2024-08-22 10:30:12.787000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test-2", + "default_db": "gcp-staging-2", + "default_schema": "_SESSION", + "query_hash": "e55a9654f8c04ebea7d9b153b53cd5e0763b9a15fd68b219eb07719ee0214726", + "usage_multiplier": 1, + "extra_info": { + "job_id": "51df8223-866e-4322-9c1c-a91c808a37b0", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging-2", + "dataset_id": "_9b92e769ee331eccc38faf408e659980e15ac5cb", + "table_id": "anoncbdc907d_def4_4170_a5e2_537bc2cadcfc" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging", + "dataset_id": "smoke_test_db", + "table_id": "base_table@1724319012792" + } + ] + } + }, + { + "query": "select revenue from [gcp-staging-2.smoke_test_db_3.base_table_2@0]", + "session_id": null, + "timestamp": "2024-08-22 10:30:15.755000+00:00", + "user": "urn:li:corpuser:dh-bigquery-smoke-test-2", + "default_db": "gcp-staging-2", + "default_schema": "_SESSION", + "query_hash": null, + "usage_multiplier": 1, + "extra_info": { + "job_id": "58a71429-22f4-4251-a4a0-f5370f1caadd", + "statement_type": "SELECT", + "destination_table": { + "project_id": "gcp-staging-2", + "dataset_id": "_9b92e769ee331eccc38faf408e659980e15ac5cb", + "table_id": "anon5be4a9ee_7236_4a08_8e69_62eee0161075" + }, + "referenced_tables": [ + { + "project_id": "gcp-staging-2", + "dataset_id": "smoke_test_db_3", + "table_id": "base_table_2@0" + } + ] + } + } +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/bigquery_v2/test_bigquery.py b/metadata-ingestion/tests/integration/bigquery_v2/test_bigquery.py index 762c73d2a55c60..dff7f18db6135c 100644 --- a/metadata-ingestion/tests/integration/bigquery_v2/test_bigquery.py +++ b/metadata-ingestion/tests/integration/bigquery_v2/test_bigquery.py @@ -15,6 +15,7 @@ from datahub.ingestion.source.bigquery_v2.bigquery_schema import ( BigqueryColumn, BigqueryDataset, + BigqueryProject, BigQuerySchemaApi, BigqueryTable, ) @@ -39,6 +40,33 @@ def random_email(): ) +def recipe(mcp_output_path: str, override: dict = {}) -> dict: + return { + "source": { + "type": "bigquery", + "config": { + "project_ids": ["project-id-1"], + "include_usage_statistics": False, + "include_table_lineage": False, + "include_data_platform_instance": True, + "classification": ClassificationConfig( + enabled=True, + classifiers=[ + DynamicTypedClassifierConfig( + type="datahub", + config=DataHubClassifierConfig( + minimum_values_threshold=1, + ), + ) + ], + max_workers=1, + ).dict(), + }, + }, + "sink": {"type": "file", "config": {"filename": mcp_output_path}}, + } + + @freeze_time(FROZEN_TIME) @patch.object(BigQuerySchemaApi, "get_tables_for_dataset") @patch.object(BigQuerySchemaGenerator, "get_core_table_details") @@ -47,9 +75,11 @@ def random_email(): @patch.object(BigQueryDataReader, "get_sample_data_for_table") @patch("google.cloud.bigquery.Client") @patch("google.cloud.datacatalog_v1.PolicyTagManagerClient") +@patch("google.cloud.resourcemanager_v3.ProjectsClient") def test_bigquery_v2_ingest( client, policy_tag_manager_client, + projects_client, get_sample_data_for_table, get_columns_for_dataset, get_datasets_for_project_id, @@ -111,33 +141,105 @@ def test_bigquery_v2_ingest( ) get_tables_for_dataset.return_value = iter([bigquery_table]) - source_config_dict: Dict[str, Any] = { - "project_ids": ["project-id-1"], - "include_usage_statistics": False, - "include_table_lineage": False, - "include_data_platform_instance": True, - "classification": ClassificationConfig( - enabled=True, - classifiers=[ - DynamicTypedClassifierConfig( - type="datahub", - config=DataHubClassifierConfig( - minimum_values_threshold=1, - ), - ) - ], - max_workers=1, - ).dict(), - } + pipeline_config_dict: Dict[str, Any] = recipe(mcp_output_path=mcp_output_path) - pipeline_config_dict: Dict[str, Any] = { - "source": { - "type": "bigquery", - "config": source_config_dict, - }, - "sink": {"type": "file", "config": {"filename": mcp_output_path}}, + run_and_get_pipeline(pipeline_config_dict) + + mce_helpers.check_golden_file( + pytestconfig, + output_path=mcp_output_path, + golden_path=mcp_golden_path, + ) + + +@freeze_time(FROZEN_TIME) +@patch.object(BigQuerySchemaApi, attribute="get_projects_with_labels") +@patch.object(BigQuerySchemaApi, "get_tables_for_dataset") +@patch.object(BigQuerySchemaGenerator, "get_core_table_details") +@patch.object(BigQuerySchemaApi, "get_datasets_for_project_id") +@patch.object(BigQuerySchemaApi, "get_columns_for_dataset") +@patch.object(BigQueryDataReader, "get_sample_data_for_table") +@patch("google.cloud.bigquery.Client") +@patch("google.cloud.datacatalog_v1.PolicyTagManagerClient") +@patch("google.cloud.resourcemanager_v3.ProjectsClient") +def test_bigquery_v2_project_labels_ingest( + client, + policy_tag_manager_client, + projects_client, + get_sample_data_for_table, + get_columns_for_dataset, + get_datasets_for_project_id, + get_core_table_details, + get_tables_for_dataset, + get_projects_with_labels, + pytestconfig, + tmp_path, +): + test_resources_dir = pytestconfig.rootpath / "tests/integration/bigquery_v2" + mcp_golden_path = f"{test_resources_dir}/bigquery_project_label_mcp_golden.json" + mcp_output_path = "{}/{}".format(tmp_path, "bigquery_project_label_mcp_output.json") + + get_datasets_for_project_id.return_value = [ + BigqueryDataset(name="bigquery-dataset-1") + ] + + get_projects_with_labels.return_value = [ + BigqueryProject(id="dev", name="development") + ] + + table_list_item = TableListItem( + {"tableReference": {"projectId": "", "datasetId": "", "tableId": ""}} + ) + table_name = "table-1" + get_core_table_details.return_value = {table_name: table_list_item} + get_columns_for_dataset.return_value = { + table_name: [ + BigqueryColumn( + name="age", + ordinal_position=1, + is_nullable=False, + field_path="col_1", + data_type="INT", + comment="comment", + is_partition_column=False, + cluster_column_position=None, + policy_tags=["Test Policy Tag"], + ), + BigqueryColumn( + name="email", + ordinal_position=1, + is_nullable=False, + field_path="col_2", + data_type="STRING", + comment="comment", + is_partition_column=False, + cluster_column_position=None, + ), + ] + } + get_sample_data_for_table.return_value = { + "age": [random.randint(1, 80) for i in range(20)], + "email": [random_email() for i in range(20)], } + bigquery_table = BigqueryTable( + name=table_name, + comment=None, + created=None, + last_altered=None, + size_in_bytes=None, + rows_count=None, + ) + get_tables_for_dataset.return_value = iter([bigquery_table]) + + pipeline_config_dict: Dict[str, Any] = recipe(mcp_output_path=mcp_output_path) + + del pipeline_config_dict["source"]["config"]["project_ids"] + + pipeline_config_dict["source"]["config"]["project_labels"] = [ + "environment:development" + ] + run_and_get_pipeline(pipeline_config_dict) mce_helpers.check_golden_file( diff --git a/metadata-ingestion/tests/integration/bigquery_v2/test_bigquery_queries.py b/metadata-ingestion/tests/integration/bigquery_v2/test_bigquery_queries.py new file mode 100644 index 00000000000000..fb51aac9fa246d --- /dev/null +++ b/metadata-ingestion/tests/integration/bigquery_v2/test_bigquery_queries.py @@ -0,0 +1,77 @@ +import json +from datetime import datetime +from pathlib import Path +from unittest.mock import patch + +import pytest +from freezegun import freeze_time + +from datahub.ingestion.source.usage.usage_common import BaseUsageConfig +from datahub.sql_parsing.sql_parsing_aggregator import ObservedQuery +from datahub.utilities.file_backed_collections import ConnectionWrapper, FileBackedList +from tests.test_helpers import mce_helpers +from tests.test_helpers.state_helpers import run_and_get_pipeline + +FROZEN_TIME = "2024-08-19 07:00:00" + + +def _generate_queries_cached_file(tmp_path: Path, queries_json_path: Path) -> None: + # We choose to generate Cached audit log (FileBackedList backed by sqlite) at runtime + # instead of using pre-existing sqlite file here as default serializer for FileBackedList + # uses pickle which may not work well across python versions. + + shared_connection = ConnectionWrapper(tmp_path / "audit_log.sqlite") + query_cache: FileBackedList[ObservedQuery] = FileBackedList(shared_connection) + with open(queries_json_path, "r") as f: + queries = json.load(f) + assert isinstance(queries, list) + for query in queries: + query["timestamp"] = datetime.fromisoformat(query["timestamp"]) + query_cache.append(ObservedQuery(**query)) + + query_cache.close() + shared_connection.close() + + +@freeze_time(FROZEN_TIME) +@patch("google.cloud.bigquery.Client") +@patch("google.cloud.resourcemanager_v3.ProjectsClient") +def test_queries_ingestion(project_client, client, pytestconfig, monkeypatch, tmp_path): + + test_resources_dir = pytestconfig.rootpath / "tests/integration/bigquery_v2" + mcp_golden_path = f"{test_resources_dir}/bigquery_queries_mcps_golden.json" + mcp_output_path = "bigquery_queries_mcps.json" + + try: + # query_log.json is originally created by using queries dump generated by + # acryl bigquery connector smoke test and using `datahub check extract-sql-agg-log` + # command with tablename="data" to convert cached audit log to queries json followed by + # a simple `acryl-staging`->`gcp-staging` replacement. + + _generate_queries_cached_file(tmp_path, test_resources_dir / "query_log.json") + except Exception as e: + pytest.fail(f"Failed to generate queries sqlite cache: {e}") + + pipeline_config_dict: dict = { + "source": { + "type": "bigquery-queries", + "config": { + "project_ids": ["gcp-staging", "gcp-staging-2"], + "local_temp_path": tmp_path, + }, + }, + "sink": {"type": "file", "config": {"filename": mcp_output_path}}, + } + + # This is hacky to pick all queries instead of any 10. + # Should be easy to remove once top_n_queries is supported in queries config + monkeypatch.setattr(BaseUsageConfig.__fields__["top_n_queries"], "default", 20) + + pipeline = run_and_get_pipeline(pipeline_config_dict) + pipeline.pretty_print_summary() + + mce_helpers.check_golden_file( + pytestconfig, + output_path=mcp_output_path, + golden_path=mcp_golden_path, + ) diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_prefer_sql_parser_lineage_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_prefer_sql_parser_lineage_golden.json index 81754fd6cbcaca..d2c71659706818 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_prefer_sql_parser_lineage_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_prefer_sql_parser_lineage_golden.json @@ -638,8 +638,8 @@ "json": { "timestampMillis": 1663355198240, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "STARTED" } @@ -659,8 +659,8 @@ "json": { "timestampMillis": 1663355198242, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "COMPLETE", "result": { @@ -1097,8 +1097,8 @@ "json": { "timestampMillis": 1663355198240, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "STARTED" } @@ -1118,8 +1118,8 @@ "json": { "timestampMillis": 1663355198242, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "COMPLETE", "result": { @@ -1420,8 +1420,8 @@ "json": { "timestampMillis": 1663355198240, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "STARTED" } @@ -1441,8 +1441,8 @@ "json": { "timestampMillis": 1663355198242, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "COMPLETE", "result": { @@ -1944,8 +1944,8 @@ "json": { "timestampMillis": 1663355198240, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "STARTED" } @@ -1965,8 +1965,8 @@ "json": { "timestampMillis": 1663355198242, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "COMPLETE", "result": { @@ -1982,6 +1982,2201 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.actor,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.actor,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.actor,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "model_maturity": "in dev", + "owner": "@alice", + "some_other_property": "test 1", + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "language": "sql", + "dbt_unique_id": "source.sample_dbt.pagila.actor", + "dbt_package_name": "sample_dbt", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "name": "actor", + "description": "description for actor table from dbt", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:@alice", + "type": "DATAOWNER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "source.sample_dbt.pagila.actor", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 1581759273000, + "actor": "urn:li:corpuser:dbt_executor" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "actor_id", + "nullable": false, + "description": "description for actor_id column from dbt", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "first_name", + "nullable": false, + "description": "dbt comment: Actors column \u2013 from postgres\n\ndbt model description: description for first_name from dbt", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "last_name", + "nullable": false, + "description": "description for last_name from dbt", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "last_update", + "nullable": false, + "description": "description for last_update from dbt", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp with time zone", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.actor,PROD)", + "type": "COPY" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "language": "sql", + "dbt_unique_id": "source.sample_dbt.pagila.address", + "dbt_package_name": "sample_dbt", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "name": "address", + "description": "a user's address", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "source.sample_dbt.pagila.address", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 1581759930000, + "actor": "urn:li:corpuser:dbt_executor" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "address", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "address2", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "address_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "city_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "district", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "last_update", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp with time zone", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "phone", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "postal_code", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.address,PROD)", + "type": "COPY" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.category,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.category,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.category,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "language": "sql", + "dbt_unique_id": "source.sample_dbt.pagila.category", + "dbt_package_name": "sample_dbt", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "name": "category", + "description": "a user's category", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "source.sample_dbt.pagila.category", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 1581759987000, + "actor": "urn:li:corpuser:dbt_executor" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "category_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "last_update", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp with time zone", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "name", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.category,PROD)", + "type": "COPY" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.city,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.city,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.city,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "language": "sql", + "dbt_unique_id": "source.sample_dbt.pagila.city", + "dbt_package_name": "sample_dbt", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "name": "city", + "description": "", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "source.sample_dbt.pagila.city", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 1581759925000, + "actor": "urn:li:corpuser:dbt_executor" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "city", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "city_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "country_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "last_update", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp with time zone", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.city,PROD)", + "type": "COPY" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.country,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.country,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.country,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "model_maturity": "in prod", + "owner": "@bob", + "some_other_property": "test 2", + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "language": "sql", + "dbt_unique_id": "source.sample_dbt.pagila.country", + "dbt_package_name": "sample_dbt", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "name": "country", + "description": "", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:@bob", + "type": "DATAOWNER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "source.sample_dbt.pagila.country", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 1581759840000, + "actor": "urn:li:corpuser:dbt_executor" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "country", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "country_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "last_update", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp with time zone", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.country,PROD)", + "type": "COPY" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "language": "sql", + "dbt_unique_id": "source.sample_dbt.pagila.customer", + "dbt_package_name": "sample_dbt", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "name": "customer", + "description": "description for customer table from dbt", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "source.sample_dbt.pagila.customer", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 1581760640000, + "actor": "urn:li:corpuser:dbt_executor" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "active", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "activebool", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.BooleanType": {} + } + }, + "nativeDataType": "boolean", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "address_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "create_date", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.DateType": {} + } + }, + "nativeDataType": "date", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "customer_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "email", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "first_name", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "last_name", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "last_update", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp with time zone", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "store_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer,PROD)", + "type": "COPY" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "language": "sql", + "dbt_unique_id": "source.sample_dbt.pagila.payment_p2020_01", + "dbt_package_name": "sample_dbt", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "name": "payment_p2020_01", + "description": "", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "source.sample_dbt.pagila.payment_p2020_01", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 1580505371997, + "actor": "urn:li:corpuser:dbt_executor" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "amount", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "numeric(5,2)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "customer_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "payment_date", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp with time zone", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "payment_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "rental_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "staff_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_01,PROD)", + "type": "COPY" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "an_array_property": "['alpha', 'beta', 'charlie']", + "model_maturity": "in prod", + "owner": "@charles", + "some_other_property": "test 3", + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "language": "sql", + "dbt_unique_id": "source.sample_dbt.pagila.payment_p2020_02", + "dbt_package_name": "sample_dbt", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "name": "payment_p2020_02", + "description": "", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:@charles", + "type": "DATAOWNER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "source.sample_dbt.pagila.payment_p2020_02", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 1582319845997, + "actor": "urn:li:corpuser:dbt_executor" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "amount", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "numeric(5,2)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "customer_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "payment_date", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp with time zone", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "payment_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "rental_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "staff_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_02,PROD)", + "type": "COPY" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "language": "sql", + "dbt_unique_id": "source.sample_dbt.pagila.payment_p2020_03", + "dbt_package_name": "sample_dbt", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "name": "payment_p2020_03", + "description": "", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "source.sample_dbt.pagila.payment_p2020_03", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 1584998318997, + "actor": "urn:li:corpuser:dbt_executor" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "amount", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "numeric(5,2)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "customer_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "payment_date", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp with time zone", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "payment_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "rental_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "staff_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_03,PROD)", + "type": "COPY" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "language": "sql", + "dbt_unique_id": "source.sample_dbt.pagila.payment_p2020_04", + "dbt_package_name": "sample_dbt", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "name": "payment_p2020_04", + "description": "", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "source.sample_dbt.pagila.payment_p2020_04", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 1588287228997, + "actor": "urn:li:corpuser:dbt_executor" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "amount", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "numeric(5,2)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "customer_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "payment_date", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp with time zone", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "payment_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "rental_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "staff_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_04,PROD)", + "type": "COPY" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "language": "sql", + "dbt_unique_id": "source.sample_dbt.pagila.payment_p2020_05", + "dbt_package_name": "sample_dbt", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "name": "payment_p2020_05", + "description": "a payment", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "source.sample_dbt.pagila.payment_p2020_05", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 1589460269997, + "actor": "urn:li:corpuser:dbt_executor" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "amount", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "numeric(5,2)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "customer_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "payment_date", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp with time zone", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "payment_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "rental_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "staff_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_05,PROD)", + "type": "COPY" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "language": "sql", + "dbt_unique_id": "source.sample_dbt.pagila.payment_p2020_06", + "dbt_package_name": "sample_dbt", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "name": "payment_p2020_06", + "description": "", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "source.sample_dbt.pagila.payment_p2020_06", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": -62135596800000, + "actor": "urn:li:corpuser:dbt_executor" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "amount", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "numeric(5,2)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "customer_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "payment_date", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp with time zone", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "payment_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "rental_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "staff_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_06,PROD)", + "type": "COPY" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.an-aliased-view-for-monthly-billing,PROD)", @@ -2344,8 +4539,8 @@ }, "assertionUrn": "urn:li:assertion:ba2c6ba830d407d539452f4cf46c92a6", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -2426,8 +4621,8 @@ }, "assertionUrn": "urn:li:assertion:10f2a119dedcaab43afc47ff13d9cb5b", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -2507,8 +4702,8 @@ }, "assertionUrn": "urn:li:assertion:c456eccf6440c6e3388c584689a74d91", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -2588,8 +4783,8 @@ }, "assertionUrn": "urn:li:assertion:f812b73477d81e6af283d918cb59e7bf", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -2678,8 +4873,8 @@ }, "assertionUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -2752,8 +4947,8 @@ }, "assertionUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -2833,8 +5028,8 @@ }, "assertionUrn": "urn:li:assertion:f6a1fde3ab4919abcc04bdee93144958", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -2920,8 +5115,8 @@ }, "assertionUrn": "urn:li:assertion:60ce4aad7ff6dbff7004da0f2258c9df", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, diff --git a/metadata-ingestion/tests/integration/dbt/test_dbt.py b/metadata-ingestion/tests/integration/dbt/test_dbt.py index a46da9707679c7..d213cffa78045e 100644 --- a/metadata-ingestion/tests/integration/dbt/test_dbt.py +++ b/metadata-ingestion/tests/integration/dbt/test_dbt.py @@ -227,7 +227,7 @@ def set_paths( source_config_modifiers={ "prefer_sql_parser_lineage": True, "skip_sources_in_lineage": True, - "entities_enabled": {"sources": "NO"}, + # "entities_enabled": {"sources": "NO"}, }, ), ], diff --git a/metadata-ingestion/tests/integration/fivetran/test_fivetran.py b/metadata-ingestion/tests/integration/fivetran/test_fivetran.py index 887dcce4b7e9b9..5e0e20234cc992 100644 --- a/metadata-ingestion/tests/integration/fivetran/test_fivetran.py +++ b/metadata-ingestion/tests/integration/fivetran/test_fivetran.py @@ -9,6 +9,7 @@ from datahub.configuration.common import ConfigurationWarning from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.run.pipeline import Pipeline +from datahub.ingestion.source.bigquery_v2.bigquery_config import BigQueryCredential from datahub.ingestion.source.fivetran.config import ( BigQueryDestinationConfig, FivetranSourceConfig, @@ -16,7 +17,6 @@ ) from datahub.ingestion.source.fivetran.fivetran import FivetranSource from datahub.ingestion.source.fivetran.fivetran_query import FivetranLogQuery -from datahub.ingestion.source_config.usage.bigquery_usage import BigQueryCredential from tests.test_helpers import mce_helpers FROZEN_TIME = "2022-06-07 17:00:00" diff --git a/metadata-ingestion/tests/integration/looker/golden_test_folder_path_pattern_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_folder_path_pattern_ingest.json new file mode 100644 index 00000000000000..5bee9c4adec8d4 --- /dev/null +++ b/metadata-ingestion/tests/integration/looker/golden_test_folder_path_pattern_ingest.json @@ -0,0 +1,887 @@ +[ +{ + "entityType": "container", + "entityUrn": "urn:li:container:13edab8c7af69549d8fc4ab3b7d87e7c", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "looker", + "env": "PROD", + "folder_id": "a" + }, + "name": "A" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:13edab8c7af69549d8fc4ab3b7d87e7c", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:13edab8c7af69549d8fc4ab3b7d87e7c", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:looker" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:13edab8c7af69549d8fc4ab3b7d87e7c", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:13edab8c7af69549d8fc4ab3b7d87e7c", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Folders" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c91fd8c071064eb00f9a0a9bad69f2d1", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "looker", + "env": "PROD", + "folder_id": "b" + }, + "name": "B" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c91fd8c071064eb00f9a0a9bad69f2d1", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c91fd8c071064eb00f9a0a9bad69f2d1", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:looker" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c91fd8c071064eb00f9a0a9bad69f2d1", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c91fd8c071064eb00f9a0a9bad69f2d1", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Folders" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ba4628721936d16f4066d86b414e8891", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "looker", + "env": "PROD", + "folder_id": "c" + }, + "name": "C" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ba4628721936d16f4066d86b414e8891", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ba4628721936d16f4066d86b414e8891", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:looker" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ba4628721936d16f4066d86b414e8891", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ba4628721936d16f4066d86b414e8891", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Folders" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DashboardSnapshot": { + "urn": "urn:li:dashboard:(looker,dashboards.3)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dashboard.DashboardInfo": { + "customProperties": {}, + "title": "third dashboard", + "description": "third", + "charts": [], + "datasets": [], + "lastModified": { + "created": { + "time": 1586847600000, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 1586847600000, + "actor": "urn:li:corpuser:unknown" + } + }, + "dashboardUrl": "https://looker.company.com/dashboards/3" + } + }, + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/Folders/A/B/C" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(looker,dashboards.3)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:ba4628721936d16f4066d86b414e8891" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(looker,dashboards.3)", + "changeType": "UPSERT", + "aspectName": "embed", + "aspect": { + "json": { + "renderUrl": "https://looker.company.com/embed/dashboards/3" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(looker,dashboards.3)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Folders" + }, + { + "id": "urn:li:container:13edab8c7af69549d8fc4ab3b7d87e7c", + "urn": "urn:li:container:13edab8c7af69549d8fc4ab3b7d87e7c" + }, + { + "id": "urn:li:container:c91fd8c071064eb00f9a0a9bad69f2d1", + "urn": "urn:li:container:c91fd8c071064eb00f9a0a9bad69f2d1" + }, + { + "id": "urn:li:container:ba4628721936d16f4066d86b414e8891", + "urn": "urn:li:container:ba4628721936d16f4066d86b414e8891" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(looker,dashboard_elements.2)", + "changeType": "UPSERT", + "aspectName": "inputFields", + "aspect": { + "json": { + "fields": [ + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(looker,dashboard_elements.2),calc)", + "schemaField": { + "fieldPath": "calc", + "nullable": false, + "description": "", + "label": "foobar", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD),dim1)", + "schemaField": { + "fieldPath": "dim1", + "nullable": false, + "description": "dimension one description", + "label": "Dimensions One Label", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(looker,dashboards.3)", + "changeType": "UPSERT", + "aspectName": "inputFields", + "aspect": { + "json": { + "fields": [ + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(looker,dashboard_elements.2),calc)", + "schemaField": { + "fieldPath": "calc", + "nullable": false, + "description": "", + "label": "foobar", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD),dim1)", + "schemaField": { + "fieldPath": "dim1", + "nullable": false, + "description": "dimension one description", + "label": "Dimensions One Label", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:59a5aa45397364e6882e793f1bc77b42", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "looker", + "env": "PROD", + "model_name": "data" + }, + "name": "data" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:59a5aa45397364e6882e793f1bc77b42", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:59a5aa45397364e6882e793f1bc77b42", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:looker" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:59a5aa45397364e6882e793f1bc77b42", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "LookML Model" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:59a5aa45397364e6882e793f1bc77b42", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Explore" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/Explore/data" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "looker.explore.label": "My Explore View", + "looker.explore.file": "test_source_file.lkml" + }, + "externalUrl": "https://looker.company.com/explore/data/my_view", + "name": "My Explore View", + "description": "lorem ipsum", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1586847600000, + "actor": "urn:li:corpuser:datahub" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.underlying_view,PROD)", + "type": "VIEW" + } + ] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "my_view", + "platform": "urn:li:dataPlatform:looker", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "dim1", + "nullable": false, + "description": "dimension one description", + "label": "Dimensions One Label", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + } + ], + "primaryKeys": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Explore" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)", + "changeType": "UPSERT", + "aspectName": "embed", + "aspect": { + "json": { + "renderUrl": "https://looker.company.com/embed/explore/data/my_view" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:59a5aa45397364e6882e793f1bc77b42" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Explore" + }, + { + "id": "urn:li:container:59a5aa45397364e6882e793f1bc77b42", + "urn": "urn:li:container:59a5aa45397364e6882e793f1bc77b42" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { + "urn": "urn:li:tag:Dimension", + "aspects": [ + { + "com.linkedin.pegasus2avro.tag.TagProperties": { + "name": "Dimension", + "description": "A tag that is applied to all dimension fields." + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { + "urn": "urn:li:tag:Temporal", + "aspects": [ + { + "com.linkedin.pegasus2avro.tag.TagProperties": { + "name": "Temporal", + "description": "A tag that is applied to all time-based (temporal) fields such as timestamps or durations." + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { + "urn": "urn:li:tag:Measure", + "aspects": [ + { + "com.linkedin.pegasus2avro.tag.TagProperties": { + "name": "Measure", + "description": "A tag that is applied to all measures (metrics). Measures are typically the columns that you aggregate on" + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(looker,dashboard_elements.2)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:Dimension", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:Measure", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:Temporal", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/looker/test_looker.py b/metadata-ingestion/tests/integration/looker/test_looker.py index fdc9c45fcf5396..dfda2fedf877c7 100644 --- a/metadata-ingestion/tests/integration/looker/test_looker.py +++ b/metadata-ingestion/tests/integration/looker/test_looker.py @@ -1,7 +1,7 @@ import json import time from datetime import datetime -from typing import Any, Dict, List, Optional, Union, cast +from typing import Any, Dict, List, Optional, Sequence, Tuple, Union, cast from unittest import mock import pytest @@ -12,6 +12,7 @@ Category, Dashboard, DashboardElement, + Folder, FolderBase, Look, LookmlModelExplore, @@ -1133,3 +1134,147 @@ def test_explore_tags(pytestconfig, tmp_path, mock_time, mock_datahub_graph): ] assert expected_tag_urns == actual_tag_urns + + +def side_effect_function_for_dashboards(*args: Tuple[str], **kwargs: Any) -> Dashboard: + assert kwargs["dashboard_id"] in ["1", "2", "3"], "Invalid dashboard id" + + if kwargs["dashboard_id"] == "1": + return Dashboard( + id=kwargs["dashboard_id"], + title="first dashboard", + created_at=datetime.utcfromtimestamp(time.time()), + updated_at=datetime.utcfromtimestamp(time.time()), + description="first", + folder=FolderBase(name="A", id="a"), + dashboard_elements=[ + DashboardElement( + id="2", + type="", + subtitle_text="Some text", + query=Query( + model="data", + view="my_view", + fields=["dim1"], + dynamic_fields='[{"table_calculation":"calc","label":"foobar","expression":"offset(${my_table.value},1)","value_format":null,"value_format_name":"eur","_kind_hint":"measure","_type_hint":"number"}]', + ), + ) + ], + ) + + if kwargs["dashboard_id"] == "2": + return Dashboard( + id=kwargs["dashboard_id"], + title="second dashboard", + created_at=datetime.utcfromtimestamp(time.time()), + updated_at=datetime.utcfromtimestamp(time.time()), + description="second", + folder=FolderBase(name="B", id="b"), + dashboard_elements=[ + DashboardElement( + id="2", + type="", + subtitle_text="Some text", + query=Query( + model="data", + view="my_view", + fields=["dim1"], + dynamic_fields='[{"table_calculation":"calc","label":"foobar","expression":"offset(${my_table.value},1)","value_format":null,"value_format_name":"eur","_kind_hint":"measure","_type_hint":"number"}]', + ), + ) + ], + ) + + if kwargs["dashboard_id"] == "3": + return Dashboard( + id=kwargs["dashboard_id"], + title="third dashboard", + created_at=datetime.utcfromtimestamp(time.time()), + updated_at=datetime.utcfromtimestamp(time.time()), + description="third", + folder=FolderBase(name="C", id="c"), + dashboard_elements=[ + DashboardElement( + id="2", + type="", + subtitle_text="Some text", + query=Query( + model="data", + view="my_view", + fields=["dim1"], + dynamic_fields='[{"table_calculation":"calc","label":"foobar","expression":"offset(${my_table.value},1)","value_format":null,"value_format_name":"eur","_kind_hint":"measure","_type_hint":"number"}]', + ), + ) + ], + ) + + # Default return to satisfy the linter + return Dashboard( + id="unknown", + title="unknown", + created_at=datetime.utcfromtimestamp(time.time()), + updated_at=datetime.utcfromtimestamp(time.time()), + description="unknown", + folder=FolderBase(name="Unknown", id="unknown"), + dashboard_elements=[], + ) + + +def side_effect_function_folder_ancestors( + *args: Tuple[Any], **kwargs: Any +) -> Sequence[Folder]: + assert args[0] in ["a", "b", "c"], "Invalid folder id" + + if args[0] == "a": + # No parent + return () + + if args[0] == "b": + return (Folder(id="a", name="A"),) + + if args[0] == "c": + return Folder(id="a", name="A"), Folder(id="b", name="B") + + # Default return to satisfy the linter + return (Folder(id="unknown", name="Unknown"),) + + +def setup_mock_dashboard_with_folder(mocked_client): + mocked_client.all_dashboards.return_value = [ + Dashboard(id="1"), + Dashboard(id="2"), + Dashboard(id="3"), + ] + mocked_client.dashboard.side_effect = side_effect_function_for_dashboards + mocked_client.folder_ancestors.side_effect = side_effect_function_folder_ancestors + + +@freeze_time(FROZEN_TIME) +def test_folder_path_pattern(pytestconfig, tmp_path, mock_time, mock_datahub_graph): + mocked_client = mock.MagicMock() + new_recipe = get_default_recipe(output_file_path=f"{tmp_path}/looker_mces.json") + new_recipe["source"]["config"]["folder_path_pattern"] = { + "allow": ["A/B/C"], + } + + with mock.patch("looker_sdk.init40") as mock_sdk: + mock_sdk.return_value = mocked_client + + setup_mock_dashboard_with_folder(mocked_client) + + setup_mock_explore(mocked_client) + + setup_mock_look(mocked_client) + + test_resources_dir = pytestconfig.rootpath / "tests/integration/looker" + + pipeline = Pipeline.create(new_recipe) + pipeline.run() + pipeline.raise_from_status() + mce_out_file = "golden_test_folder_path_pattern_ingest.json" + + mce_helpers.check_golden_file( + pytestconfig, + output_path=tmp_path / "looker_mces.json", + golden_path=f"{test_resources_dir}/{mce_out_file}", + ) diff --git a/metadata-ingestion/tests/integration/lookml/drop_hive_dot/data.model.lkml b/metadata-ingestion/tests/integration/lookml/drop_hive_dot/data.model.lkml new file mode 100644 index 00000000000000..95391f6a73e635 --- /dev/null +++ b/metadata-ingestion/tests/integration/lookml/drop_hive_dot/data.model.lkml @@ -0,0 +1,6 @@ +connection: "my_connection" + +include: "top_10_employee_income_source.view.lkml" + +explore: top_10_employee_income_source { +} \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/lookml/drop_hive_dot/top_10_employee_income_source.view.lkml b/metadata-ingestion/tests/integration/lookml/drop_hive_dot/top_10_employee_income_source.view.lkml new file mode 100644 index 00000000000000..149ce9219b54b8 --- /dev/null +++ b/metadata-ingestion/tests/integration/lookml/drop_hive_dot/top_10_employee_income_source.view.lkml @@ -0,0 +1,26 @@ +view: top_10_employee_income_source { + derived_table: { + sql: SELECT id, + name, + source + FROM hive.employee_db.income_source + ORDER BY source desc + LIMIT 10 + ;; + } + + dimension: id { + type: number + sql: ${TABLE}.id ;; + } + + dimension: name { + type: string + sql: ${TABLE}.name ;; + } + + dimension: source { + type: string + sql: ${TABLE}.source ;; + } +} \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/lookml/drop_hive_dot_golden.json b/metadata-ingestion/tests/integration/lookml/drop_hive_dot_golden.json new file mode 100644 index 00000000000000..e1dad2e91b7353 --- /dev/null +++ b/metadata-ingestion/tests/integration/lookml/drop_hive_dot_golden.json @@ -0,0 +1,357 @@ +[ +{ + "entityType": "container", + "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "looker", + "env": "PROD", + "project_name": "lkml_samples" + }, + "name": "lkml_samples" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:looker" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "LookML Project" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Folders" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.top_10_employee_income_source,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.top_10_employee_income_source,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "SELECT id,\n name,\n source\n FROM hive.employee_db.income_source\n ORDER BY source desc\n LIMIT 10", + "viewLanguage": "sql" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.top_10_employee_income_source,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.top_10_employee_income_source,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/Develop/lkml_samples/" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1586847600000, + "actor": "urn:li:corpuser:datahub" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,employee_db.income_source,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,employee_db.income_source,PROD),id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.top_10_employee_income_source,PROD),id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,employee_db.income_source,PROD),name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.top_10_employee_income_source,PROD),name)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,employee_db.income_source,PROD),source)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.top_10_employee_income_source,PROD),source)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "top_10_employee_income_source", + "platform": "urn:li:dataPlatform:looker", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "id", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "number", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "name", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "source", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + } + ], + "primaryKeys": [] + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "looker.file.path": "top_10_employee_income_source.view.lkml", + "looker.model": "data" + }, + "name": "top_10_employee_income_source", + "tags": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.top_10_employee_income_source,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:Dimension", + "changeType": "UPSERT", + "aspectName": "tagKey", + "aspect": { + "json": { + "name": "Dimension" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/lookml/refinement_include_order_golden.json b/metadata-ingestion/tests/integration/lookml/refinement_include_order_golden.json index 70f48953a06adb..c5b1d44772deab 100644 --- a/metadata-ingestion/tests/integration/lookml/refinement_include_order_golden.json +++ b/metadata-ingestion/tests/integration/lookml/refinement_include_order_golden.json @@ -485,9 +485,195 @@ "dataset": "urn:li:dataset:(urn:li:dataPlatform:conn,.public.book,PROD)", "type": "VIEW" } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.book,PROD),name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.extend_book,PROD),name)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.book,PROD),date)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.extend_book,PROD),date)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.book,PROD),date)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.extend_book,PROD),issue_date)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.book,PROD),date)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.extend_book,PROD),issue_date_3)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.book,PROD),count)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.extend_book,PROD),count)" + ], + "confidenceScore": 1.0 + } ] } }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "extend_book", + "platform": "urn:li:dataPlatform:looker", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "name", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "date", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "issue_date", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "number", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "issue_date_3", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "number", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "count", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "count", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Measure" + } + ] + }, + "isPartOfKey": false + } + ], + "primaryKeys": [] + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { diff --git a/metadata-ingestion/tests/integration/lookml/test_lookml.py b/metadata-ingestion/tests/integration/lookml/test_lookml.py index 9e051995d0b940..a5d838cb16d73a 100644 --- a/metadata-ingestion/tests/integration/lookml/test_lookml.py +++ b/metadata-ingestion/tests/integration/lookml/test_lookml.py @@ -1032,3 +1032,30 @@ def test_field_tag_ingest(pytestconfig, tmp_path, mock_time): output_path=tmp_path / mce_out_file, golden_path=golden_path, ) + + +@freeze_time(FROZEN_TIME) +def test_drop_hive(pytestconfig, tmp_path, mock_time): + test_resources_dir = pytestconfig.rootpath / "tests/integration/lookml" + mce_out_file = "drop_hive_dot.json" + + new_recipe = get_default_recipe( + f"{tmp_path}/{mce_out_file}", + f"{test_resources_dir}/drop_hive_dot", + ) + + new_recipe["source"]["config"]["connection_to_platform_map"] = { + "my_connection": "hive" + } + + pipeline = Pipeline.create(new_recipe) + pipeline.run() + pipeline.pretty_print_summary() + pipeline.raise_from_status(raise_warnings=True) + + golden_path = test_resources_dir / "drop_hive_dot_golden.json" + mce_helpers.check_golden_file( + pytestconfig, + output_path=tmp_path / mce_out_file, + golden_path=golden_path, + ) diff --git a/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/child_view.view.lkml b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/child_view.view.lkml new file mode 100644 index 00000000000000..5d8b51527b0fe7 --- /dev/null +++ b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/child_view.view.lkml @@ -0,0 +1,16 @@ +include: "parent_view.view.lkml" + +view: child_view { + extends: [parent_view] + + dimension: id { + primary_key: yes + type: integer + sql: ${TABLE}.id ;; + } + + dimension: child_dimension_1 { + type: string + sql: ${TABLE}.child_dimension_1 ;; + } +} \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/data.model.lkml b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/data.model.lkml index 2cc6ae994d245b..d570e0ecdb5b22 100644 --- a/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/data.model.lkml +++ b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/data.model.lkml @@ -6,7 +6,10 @@ include: "employee_total_income.view.lkml" include: "top_10_employee_income_source.view.lkml" include: "employee_tax_report.view.lkml" include: "employee_salary_rating.view.lkml" +include: "environment_activity_logs.view.lkml" +include: "employee_income_source_as_per_env.view.lkml" include: "rent_as_employee_income_source.view.lkml" +include: "child_view.view.lkml" explore: activity_logs { } @@ -26,5 +29,14 @@ explore: employee_tax_report { explore: employee_salary_rating { } +explore: environment_activity_logs { +} + +explore: employee_income_source_as_per_env { +} + explore: rent_as_employee_income_source { +} + +explore: child_view { } \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/employee_income_source_as_per_env.view.lkml b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/employee_income_source_as_per_env.view.lkml new file mode 100644 index 00000000000000..4b8e0dd46a8ce3 --- /dev/null +++ b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/employee_income_source_as_per_env.view.lkml @@ -0,0 +1,40 @@ +view: employee_income_source_as_per_env { + derived_table: { + sql: SELECT + employee_id, + employee_name, + {% if dw_eff_dt_date._is_selected or finance_dw_eff_dt_date._is_selected %} + prod_core.data.r_metric_summary_v2 + {% elsif dw_eff_dt_week._is_selected or finance_dw_eff_dt_week._is_selected %} + prod_core.data.r_metric_summary_v3 + {% else %} + 'default_table' as source + {% endif %}, + employee_income + FROM -- if dev -- dev_income_source -- if prod -- prod_income_source + WHERE + {% condition source_region %} source_table.region {% endcondition %} + ;; + } + + dimension: id { + type: number + sql: ${TABLE}.employee_id;; + } + + dimension: name { + type: string + sql: ${TABLE}.employee_name;; + } + + dimension: source { + type: string + sql: ${TABLE}.source ;; + } + + dimension: income { + type: number + sql: ${TABLE}.employee_income ;; + } + +} diff --git a/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/environment_activity_logs.view.lkml b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/environment_activity_logs.view.lkml new file mode 100644 index 00000000000000..efc7ba82754b88 --- /dev/null +++ b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/environment_activity_logs.view.lkml @@ -0,0 +1,12 @@ +view: environment_activity_logs { + sql_table_name: -- if prod -- prod.staging_app.stg_app__activity_logs + -- if dev -- {{ _user_attributes['dev_database_prefix'] }}analytics.{{ _user_attributes['dev_schema_prefix'] }}staging_app.stg_app__activity_logs + ;; + + dimension: generated_message_id { + group_label: "IDs" + primary_key: yes + type: number + sql: ${TABLE}."GENERATED_MESSAGE_ID" ;; + } +} diff --git a/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/parent_view.view.lkml b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/parent_view.view.lkml new file mode 100644 index 00000000000000..c2f18924351c29 --- /dev/null +++ b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/parent_view.view.lkml @@ -0,0 +1,18 @@ +view: parent_view { + sql_table_name: `dataset.table` ;; + + dimension: id { + primary_key: yes + type: string + sql: ${TABLE}.id ;; + } + + dimension: parent_dimension_1 { + type: string + sql: ${TABLE}.parent_dimension_1 ;; + } + + measure: parent_count { + type: count + } +} \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/lookml/vv_lineage_liquid_template_golden.json b/metadata-ingestion/tests/integration/lookml/vv_lineage_liquid_template_golden.json index 2e55971b65bd43..dd6917c112579e 100644 --- a/metadata-ingestion/tests/integration/lookml/vv_lineage_liquid_template_golden.json +++ b/metadata-ingestion/tests/integration/lookml/vv_lineage_liquid_template_golden.json @@ -1582,7 +1582,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.rent_as_employee_income_source,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.environment_activity_logs,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1600,13 +1600,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.rent_as_employee_income_source,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.environment_activity_logs,PROD)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { "json": { "materialized": false, - "viewLogic": "view: rent_as_employee_income_source {\n sql_table_name: (\n SELECT id,\n name,\n source\n FROM ${employee_income_source.SQL_TABLE_NAME}\n WHERE source = \"RENT\"\n ORDER BY source desc\n LIMIT 10\n );;\n\n\n dimension: id {\n type: number\n sql: ${TABLE}.id ;;\n }\n\n dimension: name {\n type: string\n sql: ${TABLE}.name ;;\n }\n\n dimension: source {\n type: string\n sql: ${TABLE}.source ;;\n }\n}", + "viewLogic": "view: environment_activity_logs {\n sql_table_name: -- if prod -- prod.staging_app.stg_app__activity_logs\n -- if dev -- {{ _user_attributes['dev_database_prefix'] }}analytics.{{ _user_attributes['dev_schema_prefix'] }}staging_app.stg_app__activity_logs\n ;;\n\n dimension: generated_message_id {\n group_label: \"IDs\"\n primary_key: yes\n type: number\n sql: ${TABLE}.\"GENERATED_MESSAGE_ID\" ;;\n }\n}\n", "viewLanguage": "lookml" } }, @@ -1618,7 +1618,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.rent_as_employee_income_source,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.environment_activity_logs,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -1635,7 +1635,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.rent_as_employee_income_source,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.environment_activity_logs,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.BrowsePaths": { @@ -1657,7 +1657,7 @@ "time": 1586847600000, "actor": "urn:li:corpuser:datahub" }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.employee_income_source,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,prod.staging_app.stg_app__activity_logs,PROD)", "type": "VIEW" } ], @@ -1665,33 +1665,230 @@ { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.employee_income_source,PROD),id)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,prod.staging_app.stg_app__activity_logs,PROD),generated_message_id)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.rent_as_employee_income_source,PROD),id)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.environment_activity_logs,PROD),generated_message_id)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "environment_activity_logs", + "platform": "urn:li:dataPlatform:looker", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "generated_message_id", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "number", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": true + } + ], + "primaryKeys": [ + "generated_message_id" + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "looker.file.path": "environment_activity_logs.view.lkml", + "looker.model": "data" + }, + "name": "environment_activity_logs", + "tags": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.environment_activity_logs,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.employee_income_source_as_per_env,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.employee_income_source_as_per_env,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "SELECT\n employee_id,\n employee_name,\n {% if dw_eff_dt_date._is_selected or finance_dw_eff_dt_date._is_selected %}\n prod_core.data.r_metric_summary_v2\n {% elsif dw_eff_dt_week._is_selected or finance_dw_eff_dt_week._is_selected %}\n prod_core.data.r_metric_summary_v3\n {% else %}\n 'default_table' as source\n {% endif %},\n employee_income\n FROM -- if dev -- dev_income_source -- if prod -- prod_income_source\n WHERE\n {% condition source_region %} source_table.region {% endcondition %}", + "viewLanguage": "sql" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.employee_income_source_as_per_env,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.employee_income_source_as_per_env,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/Develop/lkml_samples/" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1586847600000, + "actor": "urn:li:corpuser:datahub" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,prod_income_source,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,prod_income_source,PROD),employee_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.employee_income_source_as_per_env,PROD),id)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.employee_income_source,PROD),name)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,prod_income_source,PROD),employee_name)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.rent_as_employee_income_source,PROD),name)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.employee_income_source_as_per_env,PROD),name)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.employee_income_source,PROD),source)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,prod_income_source,PROD),source)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.rent_as_employee_income_source,PROD),source)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.employee_income_source_as_per_env,PROD),source)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,prod_income_source,PROD),employee_income)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.employee_income_source_as_per_env,PROD),income)" ], "confidenceScore": 1.0 } @@ -1700,7 +1897,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "rent_as_employee_income_source", + "schemaName": "employee_income_source_as_per_env", "platform": "urn:li:dataPlatform:looker", "version": 0, "created": { @@ -1780,6 +1977,27 @@ ] }, "isPartOfKey": false + }, + { + "fieldPath": "income", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "number", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false } ], "primaryKeys": [] @@ -1788,10 +2006,10 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "looker.file.path": "rent_as_employee_income_source.view.lkml", + "looker.file.path": "employee_income_source_as_per_env.view.lkml", "looker.model": "data" }, - "name": "rent_as_employee_income_source", + "name": "employee_income_source_as_per_env", "tags": [] } } @@ -1804,10 +2022,790 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.employee_income_source_as_per_env,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.rent_as_employee_income_source,PROD)", "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.rent_as_employee_income_source,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "view: rent_as_employee_income_source {\n sql_table_name: (\n SELECT id,\n name,\n source\n FROM ${employee_income_source.SQL_TABLE_NAME}\n WHERE source = \"RENT\"\n ORDER BY source desc\n LIMIT 10\n );;\n\n\n dimension: id {\n type: number\n sql: ${TABLE}.id ;;\n }\n\n dimension: name {\n type: string\n sql: ${TABLE}.name ;;\n }\n\n dimension: source {\n type: string\n sql: ${TABLE}.source ;;\n }\n}", + "viewLanguage": "lookml" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.rent_as_employee_income_source,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.rent_as_employee_income_source,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/Develop/lkml_samples/" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1586847600000, + "actor": "urn:li:corpuser:datahub" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.employee_income_source,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.employee_income_source,PROD),id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.rent_as_employee_income_source,PROD),id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.employee_income_source,PROD),name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.rent_as_employee_income_source,PROD),name)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.employee_income_source,PROD),source)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.rent_as_employee_income_source,PROD),source)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "rent_as_employee_income_source", + "platform": "urn:li:dataPlatform:looker", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "id", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "number", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "name", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "source", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + } + ], + "primaryKeys": [] + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "looker.file.path": "rent_as_employee_income_source.view.lkml", + "looker.model": "data" + }, + "name": "rent_as_employee_income_source", + "tags": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.rent_as_employee_income_source,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.parent_view,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.parent_view,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "view: parent_view {\n sql_table_name: `dataset.table` ;;\n\n dimension: id {\n primary_key: yes\n type: string\n sql: ${TABLE}.id ;;\n }\n\n dimension: parent_dimension_1 {\n type: string\n sql: ${TABLE}.parent_dimension_1 ;;\n }\n\n measure: parent_count {\n type: count\n }\n}", + "viewLanguage": "lookml" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.parent_view,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.parent_view,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/Develop/lkml_samples/" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1586847600000, + "actor": "urn:li:corpuser:datahub" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,.dataset.table,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,.dataset.table,PROD),id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.parent_view,PROD),id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,.dataset.table,PROD),parent_dimension_1)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.parent_view,PROD),parent_dimension_1)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,.dataset.table,PROD),parent_count)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.parent_view,PROD),parent_count)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "parent_view", + "platform": "urn:li:dataPlatform:looker", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "id", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": true + }, + { + "fieldPath": "parent_dimension_1", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "parent_count", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "count", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Measure" + } + ] + }, + "isPartOfKey": false + } + ], + "primaryKeys": [ + "id" + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "looker.file.path": "parent_view.view.lkml", + "looker.model": "data" + }, + "name": "parent_view", + "tags": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.parent_view,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.child_view,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.child_view,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "include: \"parent_view.view.lkml\"\n\nview: child_view {\n extends: [parent_view]\n\n dimension: id {\n primary_key: yes\n type: integer\n sql: ${TABLE}.id ;;\n }\n\n dimension: child_dimension_1 {\n type: string\n sql: ${TABLE}.child_dimension_1 ;;\n }\n}", + "viewLanguage": "lookml" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.child_view,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.child_view,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/Develop/lkml_samples/" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1586847600000, + "actor": "urn:li:corpuser:datahub" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,.dataset.table,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,.dataset.table,PROD),id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.child_view,PROD),id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,.dataset.table,PROD),child_dimension_1)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.child_view,PROD),child_dimension_1)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,.dataset.table,PROD),parent_dimension_1)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.child_view,PROD),parent_dimension_1)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,.dataset.table,PROD),parent_count)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.child_view,PROD),parent_count)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "child_view", + "platform": "urn:li:dataPlatform:looker", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "id", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": true + }, + { + "fieldPath": "child_dimension_1", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "parent_dimension_1", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "parent_count", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "count", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Measure" + } + ] + }, + "isPartOfKey": false + } + ], + "primaryKeys": [ + "id" + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "looker.file.path": "child_view.view.lkml", + "looker.model": "data" + }, + "name": "child_view", + "tags": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.child_view,PROD)", + "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { diff --git a/metadata-ingestion/tests/integration/mongodb/mongodb_mces_golden.json b/metadata-ingestion/tests/integration/mongodb/mongodb_mces_golden.json index ec3fd80e6a6ea4..bba160984eed87 100644 --- a/metadata-ingestion/tests/integration/mongodb/mongodb_mces_golden.json +++ b/metadata-ingestion/tests/integration/mongodb/mongodb_mces_golden.json @@ -1,4 +1,59 @@ [ +{ + "entityType": "container", + "entityUrn": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mongodb", + "instance": "instance", + "env": "PROD", + "database": "mngdb" + }, + "name": "mngdb" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mongodb", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.emptyCollection,PROD)", @@ -47,6 +102,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.emptyCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.emptyCollection,PROD)", @@ -64,6 +135,70 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Database" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.emptyCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)" + }, + { + "id": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed", + "urn": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.firstCollection,PROD)", @@ -377,6 +512,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.firstCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.firstCollection,PROD)", @@ -4026,6 +4177,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.largeCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.largeCollection,PROD)", @@ -4046,6 +4213,31 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.firstCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)" + }, + { + "id": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed", + "urn": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.largeCollection,PROD)", @@ -4063,6 +4255,47 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.largeCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)" + }, + { + "id": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed", + "urn": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.secondCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.secondCollection,PROD)", @@ -4237,6 +4470,31 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.secondCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)" + }, + { + "id": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed", + "urn": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.emptyCollection,PROD)", diff --git a/metadata-ingestion/tests/integration/mongodb/mongodb_mces_small_schema_size_golden.json b/metadata-ingestion/tests/integration/mongodb/mongodb_mces_small_schema_size_golden.json index 72b5fee49a0dbd..b2a1ba03dab768 100644 --- a/metadata-ingestion/tests/integration/mongodb/mongodb_mces_small_schema_size_golden.json +++ b/metadata-ingestion/tests/integration/mongodb/mongodb_mces_small_schema_size_golden.json @@ -1,4 +1,59 @@ [ +{ + "entityType": "container", + "entityUrn": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mongodb", + "instance": "instance", + "env": "PROD", + "database": "mngdb" + }, + "name": "mngdb" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test-small-schema-size", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test-small-schema-size", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mongodb", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test-small-schema-size", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.emptyCollection,PROD)", @@ -47,6 +102,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.emptyCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test-small-schema-size", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.emptyCollection,PROD)", @@ -64,6 +135,70 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Database" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test-small-schema-size", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test-small-schema-size", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.emptyCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)" + }, + { + "id": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed", + "urn": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test-small-schema-size", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.firstCollection,PROD)", @@ -236,6 +371,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.firstCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test-small-schema-size", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.firstCollection,PROD)", @@ -405,6 +556,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.largeCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test-small-schema-size", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.largeCollection,PROD)", @@ -425,6 +592,31 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.firstCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)" + }, + { + "id": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed", + "urn": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test-small-schema-size", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.largeCollection,PROD)", @@ -442,6 +634,47 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.largeCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)" + }, + { + "id": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed", + "urn": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test-small-schema-size", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.secondCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test-small-schema-size", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.secondCollection,PROD)", @@ -616,6 +849,31 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.secondCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)" + }, + { + "id": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed", + "urn": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test-small-schema-size", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.emptyCollection,PROD)", diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition.json index 51a8ef60849a9f..03d2ac38cf7d2b 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition.json @@ -7,9 +7,9 @@ "aspect": { "json": { "customProperties": { - "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv/part2.csv", + "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv/part3.csv", "number_of_files": "3", - "size_in_bytes": "3446" + "size_in_bytes": "3539" }, "name": "food_csv", "description": "", @@ -18,7 +18,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -58,18 +59,6 @@ "recursive": false, "isPartOfKey": false }, - { - "fieldPath": "healthy", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.BooleanType": {} - } - }, - "nativeDataType": "boolean", - "recursive": false, - "isPartOfKey": false - }, { "fieldPath": "height", "nullable": false, @@ -111,7 +100,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -123,8 +113,8 @@ "json": { "timestampMillis": 1615443388097, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "operationType": "UPDATE", "lastUpdatedTimestamp": 1688445089021 @@ -132,7 +122,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -152,7 +143,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -167,7 +159,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -182,7 +175,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -199,7 +193,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -214,7 +209,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -234,7 +230,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -249,7 +246,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -264,7 +262,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -281,7 +280,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -296,7 +296,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -316,7 +317,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -336,7 +338,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -351,7 +354,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -366,7 +370,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -383,7 +388,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -398,7 +404,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -422,7 +429,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -442,7 +450,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -457,7 +466,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -472,7 +482,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -489,7 +500,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -504,7 +516,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -532,7 +545,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -552,7 +566,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -567,7 +582,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -582,7 +598,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -599,7 +616,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -614,7 +632,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -646,7 +665,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -666,7 +686,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -681,7 +702,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -696,7 +718,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -713,7 +736,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -728,7 +752,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -764,7 +789,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -784,7 +810,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -799,7 +826,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -814,7 +842,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -831,7 +860,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -846,7 +876,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -886,7 +917,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -906,7 +938,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -921,7 +954,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -936,7 +970,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -953,7 +988,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -968,7 +1004,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1012,7 +1049,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1027,7 +1065,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1039,299 +1078,127 @@ "json": { "timestampMillis": 1615443388097, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, - "rowCount": 70, - "columnCount": 5, + "rowCount": 4, + "columnCount": 4, "fieldProfiles": [ { "fieldPath": "name", - "uniqueCount": 9, - "uniqueProportion": 0.13043478260869565, - "nullCount": 1, - "nullProportion": 0.014285714285714285, + "uniqueCount": 4, + "uniqueProportion": 1.0, + "nullCount": 0, + "nullProportion": 0.0, "distinctValueFrequencies": [ - { - "value": "NullValue", - "frequency": 1 - }, { "value": "apple", - "frequency": 7 - }, - { - "value": "chicken", - "frequency": 7 + "frequency": 1 }, { "value": "cookie", - "frequency": 6 - }, - { - "value": "hamburger", - "frequency": 7 + "frequency": 1 }, { "value": "lasagna", - "frequency": 7 - }, - { - "value": "orange", - "frequency": 7 + "frequency": 1 }, { "value": "pasta", - "frequency": 7 - }, - { - "value": "spinach", - "frequency": 7 - }, - { - "value": "sushi", - "frequency": 7 - }, - { - "value": "water", - "frequency": 7 + "frequency": 1 } ], "sampleValues": [ "apple", - "apple", - "apple", - "chicken", - "cookie", "cookie", - "cookie", - "lasagna", "lasagna", - "orange", - "orange", - "pasta", - "pasta", - "pasta", - "pasta", - "spinach", - "spinach", - "spinach", - "water", - "water" + "pasta" ] }, { "fieldPath": "weight", - "uniqueCount": 9, - "uniqueProportion": 0.12857142857142856, + "uniqueCount": 4, + "uniqueProportion": 1.0, "nullCount": 0, "nullProportion": 0.0, "distinctValueFrequencies": [ - { - "value": "10", - "frequency": 7 - }, - { - "value": "2", - "frequency": 7 - }, { "value": "23", - "frequency": 7 - }, - { - "value": "32", - "frequency": 7 - }, - { - "value": "36", - "frequency": 7 - }, - { - "value": "43", - "frequency": 7 + "frequency": 1 }, { "value": "49", - "frequency": 7 + "frequency": 1 }, { "value": "50", - "frequency": 7 - }, - { - "value": "53", - "frequency": 7 + "frequency": 1 }, { "value": "72", - "frequency": 7 + "frequency": 1 } ], "sampleValues": [ - "10", - "10", - "10", - "23", - "23", "23", - "32", - "32", - "36", - "43", - "43", - "49", "49", "50", - "50", - "50", - "72", - "72", - "72", "72" ] }, { "fieldPath": "height", - "uniqueCount": 4, - "uniqueProportion": 0.05714285714285714, + "uniqueCount": 1, + "uniqueProportion": 0.25, "nullCount": 0, "nullProportion": 0.0, "distinctValueFrequencies": [ { "value": "4", - "frequency": 24 - }, - { - "value": "5", - "frequency": 15 - }, - { - "value": "6", - "frequency": 23 - }, - { - "value": "7", - "frequency": 8 + "frequency": 4 } ], "sampleValues": [ "4", "4", "4", - "4", - "4", - "4", - "4", - "5", - "5", - "5", - "5", - "5", - "6", - "6", - "6", - "6", - "6", - "6", - "7", - "7" + "4" ] }, { "fieldPath": "color", - "uniqueCount": 7, - "uniqueProportion": 0.1, + "uniqueCount": 3, + "uniqueProportion": 0.75, "nullCount": 0, "nullProportion": 0.0, "distinctValueFrequencies": [ - { - "value": "blue", - "frequency": 7 - }, { "value": "brown", - "frequency": 14 - }, - { - "value": "green", - "frequency": 7 - }, - { - "value": "orange", - "frequency": 14 + "frequency": 1 }, { "value": "red", - "frequency": 14 - }, - { - "value": "white", - "frequency": 7 + "frequency": 2 }, { "value": "yellow", - "frequency": 7 + "frequency": 1 } ], "sampleValues": [ - "blue", - "blue", - "brown", - "brown", "brown", - "green", - "green", - "green", - "orange", - "orange", - "red", - "red", "red", "red", - "red", - "white", - "yellow", - "yellow", - "yellow", "yellow" ] - }, - { - "fieldPath": "healthy", - "uniqueCount": 2, - "uniqueProportion": 0.028985507246376812, - "nullCount": 1, - "nullProportion": 0.014285714285714285, - "sampleValues": [ - "False", - "False", - "False", - "False", - "False", - "False", - "False", - "False", - "False", - "None", - "True", - "True", - "True", - "True", - "True", - "True", - "True", - "True", - "True", - "True" - ] } ] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1379,7 +1246,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1401,7 +1269,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1494,7 +1363,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1506,8 +1376,8 @@ "json": { "timestampMillis": 1615443388097, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "operationType": "UPDATE", "lastUpdatedTimestamp": 1688445119021 @@ -1515,7 +1385,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1530,7 +1401,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1542,8 +1414,8 @@ "json": { "timestampMillis": 1615443388097, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "rowCount": 70, "columnCount": 5, @@ -1834,7 +1706,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1882,7 +1755,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1897,7 +1771,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1912,7 +1787,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_filename.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_filename.json index 8e67bbf10bd6dd..48c59753fcaff3 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_filename.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_filename.json @@ -7,9 +7,9 @@ "aspect": { "json": { "customProperties": { - "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv/part2.csv", + "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv/part3.csv", "number_of_files": "3", - "size_in_bytes": "3446" + "size_in_bytes": "3539" }, "name": "folder_aaa.food_csv", "description": "", @@ -18,7 +18,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -58,18 +59,6 @@ "recursive": false, "isPartOfKey": false }, - { - "fieldPath": "healthy", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.BooleanType": {} - } - }, - "nativeDataType": "boolean", - "recursive": false, - "isPartOfKey": false - }, { "fieldPath": "height", "nullable": false, @@ -111,7 +100,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -123,8 +113,8 @@ "json": { "timestampMillis": 1615443388097, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "operationType": "UPDATE", "lastUpdatedTimestamp": 1688445089021 @@ -132,7 +122,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -152,7 +143,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -167,7 +159,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -182,7 +175,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -199,7 +193,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -214,7 +209,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -234,7 +230,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -249,7 +246,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -264,7 +262,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -281,7 +280,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -296,7 +296,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -316,7 +317,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -336,7 +338,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -351,7 +354,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -366,7 +370,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -383,7 +388,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -398,7 +404,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -422,7 +429,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -442,7 +450,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -457,7 +466,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -472,7 +482,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -489,7 +500,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -504,7 +516,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -532,7 +545,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -552,7 +566,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -567,7 +582,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -582,7 +598,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -599,7 +616,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -614,7 +632,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -646,7 +665,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -666,7 +686,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -681,7 +702,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -696,7 +718,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -713,7 +736,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -728,7 +752,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -764,7 +789,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -784,7 +810,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -799,7 +826,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -814,7 +842,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -831,7 +860,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -846,7 +876,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -886,7 +917,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -906,7 +938,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -921,7 +954,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -936,7 +970,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -953,7 +988,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -968,7 +1004,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1012,7 +1049,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1027,7 +1065,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1039,299 +1078,127 @@ "json": { "timestampMillis": 1615443388097, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, - "rowCount": 70, - "columnCount": 5, + "rowCount": 4, + "columnCount": 4, "fieldProfiles": [ { "fieldPath": "name", - "uniqueCount": 9, - "uniqueProportion": 0.13043478260869565, - "nullCount": 1, - "nullProportion": 0.014285714285714285, + "uniqueCount": 4, + "uniqueProportion": 1.0, + "nullCount": 0, + "nullProportion": 0.0, "distinctValueFrequencies": [ - { - "value": "NullValue", - "frequency": 1 - }, { "value": "apple", - "frequency": 7 - }, - { - "value": "chicken", - "frequency": 7 + "frequency": 1 }, { "value": "cookie", - "frequency": 6 - }, - { - "value": "hamburger", - "frequency": 7 + "frequency": 1 }, { "value": "lasagna", - "frequency": 7 - }, - { - "value": "orange", - "frequency": 7 + "frequency": 1 }, { "value": "pasta", - "frequency": 7 - }, - { - "value": "spinach", - "frequency": 7 - }, - { - "value": "sushi", - "frequency": 7 - }, - { - "value": "water", - "frequency": 7 + "frequency": 1 } ], "sampleValues": [ "apple", - "apple", - "apple", - "chicken", - "cookie", "cookie", - "cookie", - "lasagna", "lasagna", - "orange", - "orange", - "pasta", - "pasta", - "pasta", - "pasta", - "spinach", - "spinach", - "spinach", - "water", - "water" + "pasta" ] }, { "fieldPath": "weight", - "uniqueCount": 9, - "uniqueProportion": 0.12857142857142856, + "uniqueCount": 4, + "uniqueProportion": 1.0, "nullCount": 0, "nullProportion": 0.0, "distinctValueFrequencies": [ - { - "value": "10", - "frequency": 7 - }, - { - "value": "2", - "frequency": 7 - }, { "value": "23", - "frequency": 7 - }, - { - "value": "32", - "frequency": 7 - }, - { - "value": "36", - "frequency": 7 - }, - { - "value": "43", - "frequency": 7 + "frequency": 1 }, { "value": "49", - "frequency": 7 + "frequency": 1 }, { "value": "50", - "frequency": 7 - }, - { - "value": "53", - "frequency": 7 + "frequency": 1 }, { "value": "72", - "frequency": 7 + "frequency": 1 } ], "sampleValues": [ - "10", - "10", - "10", - "23", - "23", "23", - "32", - "32", - "36", - "43", - "43", - "49", "49", "50", - "50", - "50", - "72", - "72", - "72", "72" ] }, { "fieldPath": "height", - "uniqueCount": 4, - "uniqueProportion": 0.05714285714285714, + "uniqueCount": 1, + "uniqueProportion": 0.25, "nullCount": 0, "nullProportion": 0.0, "distinctValueFrequencies": [ { "value": "4", - "frequency": 24 - }, - { - "value": "5", - "frequency": 15 - }, - { - "value": "6", - "frequency": 23 - }, - { - "value": "7", - "frequency": 8 + "frequency": 4 } ], "sampleValues": [ "4", "4", "4", - "4", - "4", - "4", - "4", - "5", - "5", - "5", - "5", - "5", - "6", - "6", - "6", - "6", - "6", - "6", - "7", - "7" + "4" ] }, { "fieldPath": "color", - "uniqueCount": 7, - "uniqueProportion": 0.1, + "uniqueCount": 3, + "uniqueProportion": 0.75, "nullCount": 0, "nullProportion": 0.0, "distinctValueFrequencies": [ - { - "value": "blue", - "frequency": 7 - }, { "value": "brown", - "frequency": 14 - }, - { - "value": "green", - "frequency": 7 - }, - { - "value": "orange", - "frequency": 14 + "frequency": 1 }, { "value": "red", - "frequency": 14 - }, - { - "value": "white", - "frequency": 7 + "frequency": 2 }, { "value": "yellow", - "frequency": 7 + "frequency": 1 } ], "sampleValues": [ - "blue", - "blue", - "brown", - "brown", "brown", - "green", - "green", - "green", - "orange", - "orange", - "red", - "red", "red", "red", - "red", - "white", - "yellow", - "yellow", - "yellow", "yellow" ] - }, - { - "fieldPath": "healthy", - "uniqueCount": 2, - "uniqueProportion": 0.028985507246376812, - "nullCount": 1, - "nullProportion": 0.014285714285714285, - "sampleValues": [ - "False", - "False", - "False", - "False", - "False", - "False", - "False", - "False", - "False", - "None", - "True", - "True", - "True", - "True", - "True", - "True", - "True", - "True", - "True", - "True" - ] } ] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1379,7 +1246,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1401,7 +1269,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1494,7 +1363,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1506,8 +1376,8 @@ "json": { "timestampMillis": 1615443388097, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "operationType": "UPDATE", "lastUpdatedTimestamp": 1688445119021 @@ -1515,7 +1385,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1530,7 +1401,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1542,8 +1414,8 @@ "json": { "timestampMillis": 1615443388097, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "rowCount": 70, "columnCount": 5, @@ -1834,7 +1706,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1882,7 +1755,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1897,7 +1771,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1912,7 +1787,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_update_schema_with_partition_autodetect.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_update_schema_with_partition_autodetect.json new file mode 100644 index 00000000000000..adb3686309e6c4 --- /dev/null +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_update_schema_with_partition_autodetect.json @@ -0,0 +1,2572 @@ +[ +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv/part3.csv", + "number_of_files": "3", + "size_in_bytes": "3539" + }, + "name": "folder_aaa.food_csv", + "description": "", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "folder_aaa.food_csv", + "platform": "urn:li:dataPlatform:file", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "color", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "height", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "weight", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586833420000 + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "file", + "env": "UAT", + "folder_abs_path": "tests" + }, + "name": "tests" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:file" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "file", + "env": "UAT", + "folder_abs_path": "tests/integration" + }, + "name": "integration" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:file" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "file", + "env": "UAT", + "folder_abs_path": "tests/integration/s3" + }, + "name": "s3" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:file" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" + }, + { + "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "file", + "env": "UAT", + "folder_abs_path": "tests/integration/s3/test_data" + }, + "name": "test_data" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:file" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" + }, + { + "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" + }, + { + "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", + "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "file", + "env": "UAT", + "folder_abs_path": "tests/integration/s3/test_data/local_system" + }, + "name": "local_system" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:file" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" + }, + { + "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" + }, + { + "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", + "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" + }, + { + "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", + "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "file", + "env": "UAT", + "folder_abs_path": "tests/integration/s3/test_data/local_system/folder_a" + }, + "name": "folder_a" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:file" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:93525defb812252106d3b0c08a55e39a" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" + }, + { + "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" + }, + { + "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", + "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" + }, + { + "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", + "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" + }, + { + "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", + "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "file", + "env": "UAT", + "folder_abs_path": "tests/integration/s3/test_data/local_system/folder_a/folder_aa" + }, + "name": "folder_aa" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:file" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" + }, + { + "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" + }, + { + "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", + "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" + }, + { + "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", + "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" + }, + { + "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", + "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" + }, + { + "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", + "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "file", + "env": "UAT", + "folder_abs_path": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa" + }, + "name": "folder_aaa" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:file" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:98a716614da5246426edd48260406364" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" + }, + { + "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" + }, + { + "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", + "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" + }, + { + "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", + "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" + }, + { + "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", + "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" + }, + { + "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", + "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" + }, + { + "id": "urn:li:container:98a716614da5246426edd48260406364", + "urn": "urn:li:container:98a716614da5246426edd48260406364" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "datasetProfile", + "aspect": { + "json": { + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "rowCount": 4, + "columnCount": 4, + "fieldProfiles": [ + { + "fieldPath": "name", + "uniqueCount": 4, + "uniqueProportion": 1.0, + "nullCount": 0, + "nullProportion": 0.0, + "distinctValueFrequencies": [ + { + "value": "apple", + "frequency": 1 + }, + { + "value": "cookie", + "frequency": 1 + }, + { + "value": "lasagna", + "frequency": 1 + }, + { + "value": "pasta", + "frequency": 1 + } + ], + "sampleValues": [ + "apple", + "cookie", + "lasagna", + "pasta" + ] + }, + { + "fieldPath": "weight", + "uniqueCount": 4, + "uniqueProportion": 1.0, + "nullCount": 0, + "nullProportion": 0.0, + "distinctValueFrequencies": [ + { + "value": "23", + "frequency": 1 + }, + { + "value": "49", + "frequency": 1 + }, + { + "value": "50", + "frequency": 1 + }, + { + "value": "72", + "frequency": 1 + } + ], + "sampleValues": [ + "23", + "49", + "50", + "72" + ] + }, + { + "fieldPath": "height", + "uniqueCount": 1, + "uniqueProportion": 0.25, + "nullCount": 0, + "nullProportion": 0.0, + "distinctValueFrequencies": [ + { + "value": "4", + "frequency": 4 + } + ], + "sampleValues": [ + "4", + "4", + "4", + "4" + ] + }, + { + "fieldPath": "color", + "uniqueCount": 3, + "uniqueProportion": 0.75, + "nullCount": 0, + "nullProportion": 0.0, + "distinctValueFrequencies": [ + { + "value": "brown", + "frequency": 1 + }, + { + "value": "red", + "frequency": 2 + }, + { + "value": "yellow", + "frequency": 1 + } + ], + "sampleValues": [ + "brown", + "red", + "red", + "yellow" + ] + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" + }, + { + "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" + }, + { + "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", + "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" + }, + { + "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", + "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" + }, + { + "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", + "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" + }, + { + "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", + "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" + }, + { + "id": "urn:li:container:98a716614da5246426edd48260406364", + "urn": "urn:li:container:98a716614da5246426edd48260406364" + }, + { + "id": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", + "urn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet/part2.parquet", + "number_of_files": "2", + "size_in_bytes": "8412" + }, + "name": "folder_aaa.food_parquet", + "description": "", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "folder_aaa.food_parquet", + "platform": "urn:li:dataPlatform:file", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "color", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "healthy", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.BooleanType": {} + } + }, + "nativeDataType": "bool", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "height", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "int64", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "weight", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "int64", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586833440000 + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "datasetProfile", + "aspect": { + "json": { + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "rowCount": 70, + "columnCount": 5, + "fieldProfiles": [ + { + "fieldPath": "name", + "uniqueCount": 9, + "uniqueProportion": 0.13043478260869565, + "nullCount": 1, + "nullProportion": 0.014285714285714285, + "distinctValueFrequencies": [ + { + "value": "NullValue", + "frequency": 1 + }, + { + "value": "apple", + "frequency": 7 + }, + { + "value": "chicken", + "frequency": 7 + }, + { + "value": "cookie", + "frequency": 6 + }, + { + "value": "hamburger", + "frequency": 7 + }, + { + "value": "lasagna", + "frequency": 7 + }, + { + "value": "orange", + "frequency": 7 + }, + { + "value": "pasta", + "frequency": 7 + }, + { + "value": "spinach", + "frequency": 7 + }, + { + "value": "sushi", + "frequency": 7 + }, + { + "value": "water", + "frequency": 7 + } + ], + "sampleValues": [ + "apple", + "apple", + "apple", + "chicken", + "cookie", + "cookie", + "cookie", + "lasagna", + "lasagna", + "orange", + "orange", + "pasta", + "pasta", + "pasta", + "pasta", + "spinach", + "spinach", + "spinach", + "water", + "water" + ] + }, + { + "fieldPath": "weight", + "uniqueCount": 10, + "uniqueProportion": 0.14285714285714285, + "nullCount": 0, + "nullProportion": 0.0, + "distinctValueFrequencies": [ + { + "value": "10", + "frequency": 7 + }, + { + "value": "2", + "frequency": 7 + }, + { + "value": "23", + "frequency": 7 + }, + { + "value": "32", + "frequency": 7 + }, + { + "value": "36", + "frequency": 7 + }, + { + "value": "43", + "frequency": 7 + }, + { + "value": "49", + "frequency": 7 + }, + { + "value": "50", + "frequency": 7 + }, + { + "value": "53", + "frequency": 7 + }, + { + "value": "72", + "frequency": 7 + } + ], + "sampleValues": [ + "10", + "10", + "10", + "23", + "23", + "23", + "32", + "32", + "36", + "43", + "43", + "49", + "49", + "50", + "50", + "50", + "72", + "72", + "72", + "72" + ] + }, + { + "fieldPath": "height", + "uniqueCount": 4, + "uniqueProportion": 0.05714285714285714, + "nullCount": 0, + "nullProportion": 0.0, + "distinctValueFrequencies": [ + { + "value": "4", + "frequency": 24 + }, + { + "value": "5", + "frequency": 15 + }, + { + "value": "6", + "frequency": 23 + }, + { + "value": "7", + "frequency": 8 + } + ], + "sampleValues": [ + "4", + "4", + "4", + "4", + "4", + "4", + "4", + "5", + "5", + "5", + "5", + "5", + "6", + "6", + "6", + "6", + "6", + "6", + "7", + "7" + ] + }, + { + "fieldPath": "color", + "uniqueCount": 7, + "uniqueProportion": 0.1, + "nullCount": 0, + "nullProportion": 0.0, + "distinctValueFrequencies": [ + { + "value": "blue", + "frequency": 7 + }, + { + "value": "brown", + "frequency": 14 + }, + { + "value": "green", + "frequency": 7 + }, + { + "value": "orange", + "frequency": 14 + }, + { + "value": "red", + "frequency": 14 + }, + { + "value": "white", + "frequency": 7 + }, + { + "value": "yellow", + "frequency": 7 + } + ], + "sampleValues": [ + "blue", + "blue", + "brown", + "brown", + "brown", + "green", + "green", + "green", + "orange", + "orange", + "red", + "red", + "red", + "red", + "red", + "white", + "yellow", + "yellow", + "yellow", + "yellow" + ] + }, + { + "fieldPath": "healthy", + "uniqueCount": 2, + "uniqueProportion": 0.028985507246376812, + "nullCount": 1, + "nullProportion": 0.014285714285714285, + "sampleValues": [ + "False", + "False", + "False", + "False", + "False", + "False", + "False", + "False", + "False", + "None", + "True", + "True", + "True", + "True", + "True", + "True", + "True", + "True", + "True", + "True" + ] + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" + }, + { + "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" + }, + { + "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", + "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" + }, + { + "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", + "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" + }, + { + "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", + "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" + }, + { + "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", + "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" + }, + { + "id": "urn:li:container:98a716614da5246426edd48260406364", + "urn": "urn:li:container:98a716614da5246426edd48260406364" + }, + { + "id": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", + "urn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json/year=2022/month=jan/part3.json", + "number_of_files": "13", + "size_in_bytes": "188600" + }, + "name": "folder_aaa.pokemon_abilities_json", + "description": "", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "folder_aaa.pokemon_abilities_json", + "platform": "urn:li:dataPlatform:file", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "effect_changes", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries.effect", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries.language", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries.language.is_native", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries.language.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries.language.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.version_group", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.version_group.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.version_group.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_entries", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_entries.effect", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_entries.language", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_entries.language.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_entries.language.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_entries.short_effect", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries.flavor_text", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries.language", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries.language.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries.language.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries.version_group", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries.version_group.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries.version_group.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "generation", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "generation.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "generation.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "id", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "int", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "is_main_series", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.BooleanType": {} + } + }, + "nativeDataType": "bool", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "names", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "names.language", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "names.language.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "names.language.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "names.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "pokemon", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "pokemon.is_hidden", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.BooleanType": {} + } + }, + "nativeDataType": "bool", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "pokemon.pokemon", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "pokemon.pokemon.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "pokemon.pokemon.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "pokemon.slot", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "int", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586833590000 + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "datasetProfile", + "aspect": { + "json": { + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "rowCount": 1, + "columnCount": 9, + "fieldProfiles": [ + { + "fieldPath": "effect_changes", + "uniqueCount": 1, + "uniqueProportion": 1.0, + "nullCount": 0, + "nullProportion": 0.0, + "sampleValues": [ + "[Row(effect_entries=[Row(effect='Hat im Kampf keinen Effekt.', language=Row(is_native='no', name='de', url='https://pokeapi.co/api/v2/language/6/')), Row(effect='Has no effect in battle.', language=Row(is_native='yes', name='en', url='https://pokeapi.co/api/v2/language/9/'))], version_group=Row(name='black-white', url='https://pokeapi.co/api/v2/version-group/11/'))]" + ] + }, + { + "fieldPath": "effect_entries", + "uniqueCount": 1, + "uniqueProportion": 1.0, + "nullCount": 0, + "nullProportion": 0.0, + "sampleValues": [ + "[Row(effect='Attacken die Schaden verursachen haben mit jedem Treffer eine 10% Chance das Ziel zur\u00fcckschrecken zu lassen, wenn die Attacke dies nicht bereits als Nebeneffekt hat.\\n\\nDer Effekt stapelt nicht mit dem von getragenen Items.\\n\\nAu\u00dferhalb vom Kampf: Wenn ein Pok\u00e9mon mit dieser F\u00e4higkeit an erster Stelle im Team steht, tauchen wilde Pok\u00e9mon nur halb so oft auf.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), short_effect='Mit jedem Treffer besteht eine 10% Chance das Ziel zur\u00fcckschrecken zu lassen.'), Row(effect=\"This Pok\u00e9mon's damaging moves have a 10% chance to make the target flinch with each hit if they do not already cause flinching as a secondary effect.\\n\\nThis ability does not stack with a held item.\\n\\nOverworld: The wild encounter rate is halved while this Pok\u00e9mon is first in the party.\", language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), short_effect='Has a 10% chance of making target Pok\u00e9mon flinch with each hit.')]" + ] + }, + { + "fieldPath": "flavor_text_entries", + "uniqueCount": 1, + "uniqueProportion": 1.0, + "nullCount": 0, + "nullProportion": 0.0, + "sampleValues": [ + "[Row(flavor_text='Helps repel wild POK\u00e9MON.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='ruby-sapphire', url='https://pokeapi.co/api/v2/version-group/5/')), Row(flavor_text='Helps repel wild POK\u00e9MON.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='emerald', url='https://pokeapi.co/api/v2/version-group/6/')), Row(flavor_text='Helps repel wild POK\u00e9MON.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='firered-leafgreen', url='https://pokeapi.co/api/v2/version-group/7/')), Row(flavor_text='The stench helps keep\\nwild Pok\u00e9mon away.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='diamond-pearl', url='https://pokeapi.co/api/v2/version-group/8/')), Row(flavor_text='The stench helps keep\\nwild Pok\u00e9mon away.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='platinum', url='https://pokeapi.co/api/v2/version-group/9/')), Row(flavor_text='The stench helps keep\\nwild Pok\u00e9mon away.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='heartgold-soulsilver', url='https://pokeapi.co/api/v2/version-group/10/')), Row(flavor_text='La puanteur peut\\neffrayer l\u2019adversaire.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='black-white', url='https://pokeapi.co/api/v2/version-group/11/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='black-white', url='https://pokeapi.co/api/v2/version-group/11/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='black-2-white-2', url='https://pokeapi.co/api/v2/version-group/14/')), Row(flavor_text='\u304f\u3055\u304f\u3066\\u3000\u3042\u3044\u3066\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='\uc545\ucde8 \ub54c\ubb38\uc5d0 \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='La puanteur peut effrayer\\nl\u2019adversaire.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='L\u00e4sst den Gegner durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='Es posible que el rival retroceda\\npor el mal olor.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='A volte il cattivo odore\\nfa tentennare i nemici.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='\u81ed\u304f\u3066\\u3000\u76f8\u624b\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='\u304f\u3055\u304f\u3066\\u3000\u3042\u3044\u3066\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='\uc545\ucde8 \ub54c\ubb38\uc5d0 \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='La puanteur peut effrayer\\nl\u2019adversaire.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='L\u00e4sst den Gegner durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='Es posible que el rival retroceda\\npor el mal olor.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='A volte il cattivo odore\\nfa tentennare i nemici.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='\u81ed\u304f\u3066\\u3000\u76f8\u624b\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='\u304f\u3055\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u306f\u306a\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u3053\u3046\u3052\u304d\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u3042\u3044\u3066\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\uc545\ucde8\ub97c \ud48d\uaca8\uc11c\\n\uacf5\uaca9\ud588\uc744 \ub54c \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u767c\u51fa\u81ed\u6c23\uff0c\\n\u5728\u653b\u64ca\u7684\u6642\u5019\uff0c\\n\u6709\u6642\u6703\u4f7f\u5c0d\u624b\u754f\u7e2e\u3002', language=Row(name='zh-Hant', url='https://pokeapi.co/api/v2/language/4/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='Le Pok\u00e9mon \u00e9met une odeur si naus\u00e9abonde\\nqu\u2019il peut effrayer sa cible.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='L\u00e4sst das Ziel beim Angriff eventuell durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='Debido al mal olor que emana, al atacar al rival puede\\nhacerlo retroceder.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='A volte il cattivo odore emesso dal Pok\u00e9mon\\nfa tentennare i nemici quando attacca.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='By releasing stench when attacking, this Pok\u00e9mon\\nmay cause the target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u81ed\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u653e\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u653b\u6483\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u76f8\u624b\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u901a\u8fc7\u91ca\u653e\u81ed\u81ed\u7684\u6c14\u5473\uff0c\\n\u5728\u653b\u51fb\u7684\u65f6\u5019\uff0c\\n\u6709\u65f6\u4f1a\u4f7f\u5bf9\u624b\u754f\u7f29\u3002', language=Row(name='zh-Hans', url='https://pokeapi.co/api/v2/language/12/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u304f\u3055\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u306f\u306a\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u3053\u3046\u3052\u304d\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u3042\u3044\u3066\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\uc545\ucde8\ub97c \ud48d\uaca8\uc11c\\n\uacf5\uaca9\ud588\uc744 \ub54c \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\u767c\u51fa\u81ed\u6c23\uff0c\\n\u5728\u653b\u64ca\u7684\u6642\u5019\uff0c\\n\u6709\u6642\u6703\u4f7f\u5c0d\u624b\u754f\u7e2e\u3002', language=Row(name='zh-Hant', url='https://pokeapi.co/api/v2/language/4/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='Le Pok\u00e9mon \u00e9met une odeur si naus\u00e9abonde\\nqu\u2019il peut effrayer sa cible.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='L\u00e4sst das Ziel beim Angriff eventuell durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='Debido al mal olor que emana, al atacar al rival puede\\nhacerlo retroceder.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='A volte il cattivo odore emesso dal Pok\u00e9mon\\nfa tentennare i nemici quando attacca.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='By releasing stench when attacking, this Pok\u00e9mon\\nmay cause the target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\u81ed\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u653e\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u653b\u6483\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u76f8\u624b\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\u901a\u8fc7\u91ca\u653e\u81ed\u81ed\u7684\u6c14\u5473\uff0c\\n\u5728\u653b\u51fb\u7684\u65f6\u5019\uff0c\\n\u6709\u65f6\u4f1a\u4f7f\u5bf9\u624b\u754f\u7f29\u3002', language=Row(name='zh-Hans', url='https://pokeapi.co/api/v2/language/12/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/'))]" + ] + }, + { + "fieldPath": "generation", + "uniqueCount": 1, + "uniqueProportion": 1.0, + "nullCount": 0, + "nullProportion": 0.0, + "sampleValues": [ + "Row(name='generation-iii', url='https://pokeapi.co/api/v2/generation/3/')" + ] + }, + { + "fieldPath": "id", + "uniqueCount": 1, + "uniqueProportion": 1.0, + "nullCount": 0, + "nullProportion": 0.0, + "distinctValueFrequencies": [ + { + "value": "1", + "frequency": 1 + } + ], + "sampleValues": [ + "1" + ] + }, + { + "fieldPath": "is_main_series", + "uniqueCount": 1, + "uniqueProportion": 1.0, + "nullCount": 0, + "nullProportion": 0.0, + "sampleValues": [ + "True" + ] + }, + { + "fieldPath": "name", + "uniqueCount": 1, + "uniqueProportion": 1.0, + "nullCount": 0, + "nullProportion": 0.0, + "distinctValueFrequencies": [ + { + "value": "stench", + "frequency": 1 + } + ], + "sampleValues": [ + "stench" + ] + }, + { + "fieldPath": "names", + "uniqueCount": 1, + "uniqueProportion": 1.0, + "nullCount": 0, + "nullProportion": 0.0, + "sampleValues": [ + "[Row(language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), name='\u3042\u304f\u3057\u3085\u3046'), Row(language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), name='\uc545\ucde8'), Row(language=Row(name='zh-Hant', url='https://pokeapi.co/api/v2/language/4/'), name='\u60e1\u81ed'), Row(language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), name='Puanteur'), Row(language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), name='Duftnote'), Row(language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), name='Hedor'), Row(language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), name='Tanfo'), Row(language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), name='Stench'), Row(language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), name='\u3042\u304f\u3057\u3085\u3046'), Row(language=Row(name='zh-Hans', url='https://pokeapi.co/api/v2/language/12/'), name='\u6076\u81ed')]" + ] + }, + { + "fieldPath": "pokemon", + "uniqueCount": 1, + "uniqueProportion": 1.0, + "nullCount": 0, + "nullProportion": 0.0, + "sampleValues": [ + "[Row(is_hidden=True, pokemon=Row(name='gloom', url='https://pokeapi.co/api/v2/pokemon/44/'), slot=3), Row(is_hidden=False, pokemon=Row(name='grimer', url='https://pokeapi.co/api/v2/pokemon/88/'), slot=1), Row(is_hidden=False, pokemon=Row(name='muk', url='https://pokeapi.co/api/v2/pokemon/89/'), slot=1), Row(is_hidden=False, pokemon=Row(name='stunky', url='https://pokeapi.co/api/v2/pokemon/434/'), slot=1), Row(is_hidden=False, pokemon=Row(name='skuntank', url='https://pokeapi.co/api/v2/pokemon/435/'), slot=1), Row(is_hidden=False, pokemon=Row(name='trubbish', url='https://pokeapi.co/api/v2/pokemon/568/'), slot=1), Row(is_hidden=False, pokemon=Row(name='garbodor', url='https://pokeapi.co/api/v2/pokemon/569/'), slot=1), Row(is_hidden=False, pokemon=Row(name='garbodor-gmax', url='https://pokeapi.co/api/v2/pokemon/10198/'), slot=1)]" + ] + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" + }, + { + "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" + }, + { + "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", + "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" + }, + { + "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", + "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" + }, + { + "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", + "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" + }, + { + "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", + "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" + }, + { + "id": "urn:li:container:98a716614da5246426edd48260406364", + "urn": "urn:li:container:98a716614da5246426edd48260406364" + }, + { + "id": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", + "urn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_with_partition_autodetect_traverse_all.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_with_partition_autodetect_traverse_all.json new file mode 100644 index 00000000000000..80f584788fdb26 --- /dev/null +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_with_partition_autodetect_traverse_all.json @@ -0,0 +1,2572 @@ +[ +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv/part3.csv", + "number_of_files": "3", + "size_in_bytes": "3539" + }, + "name": "folder_aaa.food_csv", + "description": "", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "folder_aaa.food_csv", + "platform": "urn:li:dataPlatform:file", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "color", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "height", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "weight", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586833420000 + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "file", + "env": "UAT", + "folder_abs_path": "tests" + }, + "name": "tests" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:file" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "file", + "env": "UAT", + "folder_abs_path": "tests/integration" + }, + "name": "integration" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:file" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "file", + "env": "UAT", + "folder_abs_path": "tests/integration/s3" + }, + "name": "s3" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:file" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" + }, + { + "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "file", + "env": "UAT", + "folder_abs_path": "tests/integration/s3/test_data" + }, + "name": "test_data" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:file" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" + }, + { + "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" + }, + { + "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", + "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "file", + "env": "UAT", + "folder_abs_path": "tests/integration/s3/test_data/local_system" + }, + "name": "local_system" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:file" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" + }, + { + "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" + }, + { + "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", + "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" + }, + { + "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", + "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "file", + "env": "UAT", + "folder_abs_path": "tests/integration/s3/test_data/local_system/folder_a" + }, + "name": "folder_a" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:file" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:93525defb812252106d3b0c08a55e39a" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" + }, + { + "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" + }, + { + "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", + "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" + }, + { + "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", + "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" + }, + { + "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", + "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "file", + "env": "UAT", + "folder_abs_path": "tests/integration/s3/test_data/local_system/folder_a/folder_aa" + }, + "name": "folder_aa" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:file" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" + }, + { + "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" + }, + { + "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", + "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" + }, + { + "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", + "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" + }, + { + "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", + "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" + }, + { + "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", + "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "file", + "env": "UAT", + "folder_abs_path": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa" + }, + "name": "folder_aaa" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:file" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:98a716614da5246426edd48260406364" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" + }, + { + "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" + }, + { + "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", + "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" + }, + { + "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", + "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" + }, + { + "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", + "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" + }, + { + "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", + "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" + }, + { + "id": "urn:li:container:98a716614da5246426edd48260406364", + "urn": "urn:li:container:98a716614da5246426edd48260406364" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "datasetProfile", + "aspect": { + "json": { + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "rowCount": 4, + "columnCount": 4, + "fieldProfiles": [ + { + "fieldPath": "name", + "uniqueCount": 4, + "uniqueProportion": 1.0, + "nullCount": 0, + "nullProportion": 0.0, + "distinctValueFrequencies": [ + { + "value": "apple", + "frequency": 1 + }, + { + "value": "cookie", + "frequency": 1 + }, + { + "value": "lasagna", + "frequency": 1 + }, + { + "value": "pasta", + "frequency": 1 + } + ], + "sampleValues": [ + "apple", + "cookie", + "lasagna", + "pasta" + ] + }, + { + "fieldPath": "weight", + "uniqueCount": 4, + "uniqueProportion": 1.0, + "nullCount": 0, + "nullProportion": 0.0, + "distinctValueFrequencies": [ + { + "value": "23", + "frequency": 1 + }, + { + "value": "49", + "frequency": 1 + }, + { + "value": "50", + "frequency": 1 + }, + { + "value": "72", + "frequency": 1 + } + ], + "sampleValues": [ + "23", + "49", + "50", + "72" + ] + }, + { + "fieldPath": "height", + "uniqueCount": 1, + "uniqueProportion": 0.25, + "nullCount": 0, + "nullProportion": 0.0, + "distinctValueFrequencies": [ + { + "value": "4", + "frequency": 4 + } + ], + "sampleValues": [ + "4", + "4", + "4", + "4" + ] + }, + { + "fieldPath": "color", + "uniqueCount": 3, + "uniqueProportion": 0.75, + "nullCount": 0, + "nullProportion": 0.0, + "distinctValueFrequencies": [ + { + "value": "brown", + "frequency": 1 + }, + { + "value": "red", + "frequency": 2 + }, + { + "value": "yellow", + "frequency": 1 + } + ], + "sampleValues": [ + "brown", + "red", + "red", + "yellow" + ] + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" + }, + { + "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" + }, + { + "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", + "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" + }, + { + "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", + "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" + }, + { + "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", + "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" + }, + { + "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", + "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" + }, + { + "id": "urn:li:container:98a716614da5246426edd48260406364", + "urn": "urn:li:container:98a716614da5246426edd48260406364" + }, + { + "id": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", + "urn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet/part2.parquet", + "number_of_files": "2", + "size_in_bytes": "8412" + }, + "name": "folder_aaa.food_parquet", + "description": "", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "folder_aaa.food_parquet", + "platform": "urn:li:dataPlatform:file", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "color", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "healthy", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.BooleanType": {} + } + }, + "nativeDataType": "bool", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "height", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "int64", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "weight", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "int64", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586833440000 + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "datasetProfile", + "aspect": { + "json": { + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "rowCount": 70, + "columnCount": 5, + "fieldProfiles": [ + { + "fieldPath": "name", + "uniqueCount": 9, + "uniqueProportion": 0.13043478260869565, + "nullCount": 1, + "nullProportion": 0.014285714285714285, + "distinctValueFrequencies": [ + { + "value": "NullValue", + "frequency": 1 + }, + { + "value": "apple", + "frequency": 7 + }, + { + "value": "chicken", + "frequency": 7 + }, + { + "value": "cookie", + "frequency": 6 + }, + { + "value": "hamburger", + "frequency": 7 + }, + { + "value": "lasagna", + "frequency": 7 + }, + { + "value": "orange", + "frequency": 7 + }, + { + "value": "pasta", + "frequency": 7 + }, + { + "value": "spinach", + "frequency": 7 + }, + { + "value": "sushi", + "frequency": 7 + }, + { + "value": "water", + "frequency": 7 + } + ], + "sampleValues": [ + "apple", + "apple", + "apple", + "chicken", + "cookie", + "cookie", + "cookie", + "lasagna", + "lasagna", + "orange", + "orange", + "pasta", + "pasta", + "pasta", + "pasta", + "spinach", + "spinach", + "spinach", + "water", + "water" + ] + }, + { + "fieldPath": "weight", + "uniqueCount": 10, + "uniqueProportion": 0.14285714285714285, + "nullCount": 0, + "nullProportion": 0.0, + "distinctValueFrequencies": [ + { + "value": "10", + "frequency": 7 + }, + { + "value": "2", + "frequency": 7 + }, + { + "value": "23", + "frequency": 7 + }, + { + "value": "32", + "frequency": 7 + }, + { + "value": "36", + "frequency": 7 + }, + { + "value": "43", + "frequency": 7 + }, + { + "value": "49", + "frequency": 7 + }, + { + "value": "50", + "frequency": 7 + }, + { + "value": "53", + "frequency": 7 + }, + { + "value": "72", + "frequency": 7 + } + ], + "sampleValues": [ + "10", + "10", + "10", + "23", + "23", + "23", + "32", + "32", + "36", + "43", + "43", + "49", + "49", + "50", + "50", + "50", + "72", + "72", + "72", + "72" + ] + }, + { + "fieldPath": "height", + "uniqueCount": 4, + "uniqueProportion": 0.05714285714285714, + "nullCount": 0, + "nullProportion": 0.0, + "distinctValueFrequencies": [ + { + "value": "4", + "frequency": 24 + }, + { + "value": "5", + "frequency": 15 + }, + { + "value": "6", + "frequency": 23 + }, + { + "value": "7", + "frequency": 8 + } + ], + "sampleValues": [ + "4", + "4", + "4", + "4", + "4", + "4", + "4", + "5", + "5", + "5", + "5", + "5", + "6", + "6", + "6", + "6", + "6", + "6", + "7", + "7" + ] + }, + { + "fieldPath": "color", + "uniqueCount": 7, + "uniqueProportion": 0.1, + "nullCount": 0, + "nullProportion": 0.0, + "distinctValueFrequencies": [ + { + "value": "blue", + "frequency": 7 + }, + { + "value": "brown", + "frequency": 14 + }, + { + "value": "green", + "frequency": 7 + }, + { + "value": "orange", + "frequency": 14 + }, + { + "value": "red", + "frequency": 14 + }, + { + "value": "white", + "frequency": 7 + }, + { + "value": "yellow", + "frequency": 7 + } + ], + "sampleValues": [ + "blue", + "blue", + "brown", + "brown", + "brown", + "green", + "green", + "green", + "orange", + "orange", + "red", + "red", + "red", + "red", + "red", + "white", + "yellow", + "yellow", + "yellow", + "yellow" + ] + }, + { + "fieldPath": "healthy", + "uniqueCount": 2, + "uniqueProportion": 0.028985507246376812, + "nullCount": 1, + "nullProportion": 0.014285714285714285, + "sampleValues": [ + "False", + "False", + "False", + "False", + "False", + "False", + "False", + "False", + "False", + "None", + "True", + "True", + "True", + "True", + "True", + "True", + "True", + "True", + "True", + "True" + ] + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" + }, + { + "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" + }, + { + "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", + "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" + }, + { + "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", + "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" + }, + { + "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", + "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" + }, + { + "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", + "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" + }, + { + "id": "urn:li:container:98a716614da5246426edd48260406364", + "urn": "urn:li:container:98a716614da5246426edd48260406364" + }, + { + "id": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", + "urn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json/year=2022/month=jan/part3.json", + "number_of_files": "13", + "size_in_bytes": "188600" + }, + "name": "folder_aaa.pokemon_abilities_json", + "description": "", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "folder_aaa.pokemon_abilities_json", + "platform": "urn:li:dataPlatform:file", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "effect_changes", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries.effect", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries.language", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries.language.is_native", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries.language.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries.language.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.version_group", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.version_group.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.version_group.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_entries", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_entries.effect", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_entries.language", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_entries.language.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_entries.language.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_entries.short_effect", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries.flavor_text", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries.language", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries.language.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries.language.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries.version_group", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries.version_group.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries.version_group.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "generation", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "generation.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "generation.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "id", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "int", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "is_main_series", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.BooleanType": {} + } + }, + "nativeDataType": "bool", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "names", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "names.language", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "names.language.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "names.language.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "names.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "pokemon", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "pokemon.is_hidden", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.BooleanType": {} + } + }, + "nativeDataType": "bool", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "pokemon.pokemon", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "pokemon.pokemon.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "pokemon.pokemon.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "pokemon.slot", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "int", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586833590000 + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "datasetProfile", + "aspect": { + "json": { + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "rowCount": 1, + "columnCount": 9, + "fieldProfiles": [ + { + "fieldPath": "effect_changes", + "uniqueCount": 1, + "uniqueProportion": 1.0, + "nullCount": 0, + "nullProportion": 0.0, + "sampleValues": [ + "[Row(effect_entries=[Row(effect='Hat im Kampf keinen Effekt.', language=Row(is_native='no', name='de', url='https://pokeapi.co/api/v2/language/6/')), Row(effect='Has no effect in battle.', language=Row(is_native='yes', name='en', url='https://pokeapi.co/api/v2/language/9/'))], version_group=Row(name='black-white', url='https://pokeapi.co/api/v2/version-group/11/'))]" + ] + }, + { + "fieldPath": "effect_entries", + "uniqueCount": 1, + "uniqueProportion": 1.0, + "nullCount": 0, + "nullProportion": 0.0, + "sampleValues": [ + "[Row(effect='Attacken die Schaden verursachen haben mit jedem Treffer eine 10% Chance das Ziel zur\u00fcckschrecken zu lassen, wenn die Attacke dies nicht bereits als Nebeneffekt hat.\\n\\nDer Effekt stapelt nicht mit dem von getragenen Items.\\n\\nAu\u00dferhalb vom Kampf: Wenn ein Pok\u00e9mon mit dieser F\u00e4higkeit an erster Stelle im Team steht, tauchen wilde Pok\u00e9mon nur halb so oft auf.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), short_effect='Mit jedem Treffer besteht eine 10% Chance das Ziel zur\u00fcckschrecken zu lassen.'), Row(effect=\"This Pok\u00e9mon's damaging moves have a 10% chance to make the target flinch with each hit if they do not already cause flinching as a secondary effect.\\n\\nThis ability does not stack with a held item.\\n\\nOverworld: The wild encounter rate is halved while this Pok\u00e9mon is first in the party.\", language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), short_effect='Has a 10% chance of making target Pok\u00e9mon flinch with each hit.')]" + ] + }, + { + "fieldPath": "flavor_text_entries", + "uniqueCount": 1, + "uniqueProportion": 1.0, + "nullCount": 0, + "nullProportion": 0.0, + "sampleValues": [ + "[Row(flavor_text='Helps repel wild POK\u00e9MON.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='ruby-sapphire', url='https://pokeapi.co/api/v2/version-group/5/')), Row(flavor_text='Helps repel wild POK\u00e9MON.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='emerald', url='https://pokeapi.co/api/v2/version-group/6/')), Row(flavor_text='Helps repel wild POK\u00e9MON.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='firered-leafgreen', url='https://pokeapi.co/api/v2/version-group/7/')), Row(flavor_text='The stench helps keep\\nwild Pok\u00e9mon away.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='diamond-pearl', url='https://pokeapi.co/api/v2/version-group/8/')), Row(flavor_text='The stench helps keep\\nwild Pok\u00e9mon away.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='platinum', url='https://pokeapi.co/api/v2/version-group/9/')), Row(flavor_text='The stench helps keep\\nwild Pok\u00e9mon away.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='heartgold-soulsilver', url='https://pokeapi.co/api/v2/version-group/10/')), Row(flavor_text='La puanteur peut\\neffrayer l\u2019adversaire.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='black-white', url='https://pokeapi.co/api/v2/version-group/11/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='black-white', url='https://pokeapi.co/api/v2/version-group/11/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='black-2-white-2', url='https://pokeapi.co/api/v2/version-group/14/')), Row(flavor_text='\u304f\u3055\u304f\u3066\\u3000\u3042\u3044\u3066\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='\uc545\ucde8 \ub54c\ubb38\uc5d0 \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='La puanteur peut effrayer\\nl\u2019adversaire.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='L\u00e4sst den Gegner durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='Es posible que el rival retroceda\\npor el mal olor.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='A volte il cattivo odore\\nfa tentennare i nemici.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='\u81ed\u304f\u3066\\u3000\u76f8\u624b\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='\u304f\u3055\u304f\u3066\\u3000\u3042\u3044\u3066\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='\uc545\ucde8 \ub54c\ubb38\uc5d0 \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='La puanteur peut effrayer\\nl\u2019adversaire.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='L\u00e4sst den Gegner durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='Es posible que el rival retroceda\\npor el mal olor.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='A volte il cattivo odore\\nfa tentennare i nemici.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='\u81ed\u304f\u3066\\u3000\u76f8\u624b\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='\u304f\u3055\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u306f\u306a\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u3053\u3046\u3052\u304d\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u3042\u3044\u3066\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\uc545\ucde8\ub97c \ud48d\uaca8\uc11c\\n\uacf5\uaca9\ud588\uc744 \ub54c \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u767c\u51fa\u81ed\u6c23\uff0c\\n\u5728\u653b\u64ca\u7684\u6642\u5019\uff0c\\n\u6709\u6642\u6703\u4f7f\u5c0d\u624b\u754f\u7e2e\u3002', language=Row(name='zh-Hant', url='https://pokeapi.co/api/v2/language/4/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='Le Pok\u00e9mon \u00e9met une odeur si naus\u00e9abonde\\nqu\u2019il peut effrayer sa cible.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='L\u00e4sst das Ziel beim Angriff eventuell durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='Debido al mal olor que emana, al atacar al rival puede\\nhacerlo retroceder.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='A volte il cattivo odore emesso dal Pok\u00e9mon\\nfa tentennare i nemici quando attacca.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='By releasing stench when attacking, this Pok\u00e9mon\\nmay cause the target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u81ed\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u653e\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u653b\u6483\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u76f8\u624b\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u901a\u8fc7\u91ca\u653e\u81ed\u81ed\u7684\u6c14\u5473\uff0c\\n\u5728\u653b\u51fb\u7684\u65f6\u5019\uff0c\\n\u6709\u65f6\u4f1a\u4f7f\u5bf9\u624b\u754f\u7f29\u3002', language=Row(name='zh-Hans', url='https://pokeapi.co/api/v2/language/12/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u304f\u3055\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u306f\u306a\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u3053\u3046\u3052\u304d\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u3042\u3044\u3066\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\uc545\ucde8\ub97c \ud48d\uaca8\uc11c\\n\uacf5\uaca9\ud588\uc744 \ub54c \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\u767c\u51fa\u81ed\u6c23\uff0c\\n\u5728\u653b\u64ca\u7684\u6642\u5019\uff0c\\n\u6709\u6642\u6703\u4f7f\u5c0d\u624b\u754f\u7e2e\u3002', language=Row(name='zh-Hant', url='https://pokeapi.co/api/v2/language/4/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='Le Pok\u00e9mon \u00e9met une odeur si naus\u00e9abonde\\nqu\u2019il peut effrayer sa cible.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='L\u00e4sst das Ziel beim Angriff eventuell durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='Debido al mal olor que emana, al atacar al rival puede\\nhacerlo retroceder.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='A volte il cattivo odore emesso dal Pok\u00e9mon\\nfa tentennare i nemici quando attacca.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='By releasing stench when attacking, this Pok\u00e9mon\\nmay cause the target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\u81ed\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u653e\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u653b\u6483\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u76f8\u624b\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\u901a\u8fc7\u91ca\u653e\u81ed\u81ed\u7684\u6c14\u5473\uff0c\\n\u5728\u653b\u51fb\u7684\u65f6\u5019\uff0c\\n\u6709\u65f6\u4f1a\u4f7f\u5bf9\u624b\u754f\u7f29\u3002', language=Row(name='zh-Hans', url='https://pokeapi.co/api/v2/language/12/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/'))]" + ] + }, + { + "fieldPath": "generation", + "uniqueCount": 1, + "uniqueProportion": 1.0, + "nullCount": 0, + "nullProportion": 0.0, + "sampleValues": [ + "Row(name='generation-iii', url='https://pokeapi.co/api/v2/generation/3/')" + ] + }, + { + "fieldPath": "id", + "uniqueCount": 1, + "uniqueProportion": 1.0, + "nullCount": 0, + "nullProportion": 0.0, + "distinctValueFrequencies": [ + { + "value": "1", + "frequency": 1 + } + ], + "sampleValues": [ + "1" + ] + }, + { + "fieldPath": "is_main_series", + "uniqueCount": 1, + "uniqueProportion": 1.0, + "nullCount": 0, + "nullProportion": 0.0, + "sampleValues": [ + "True" + ] + }, + { + "fieldPath": "name", + "uniqueCount": 1, + "uniqueProportion": 1.0, + "nullCount": 0, + "nullProportion": 0.0, + "distinctValueFrequencies": [ + { + "value": "stench", + "frequency": 1 + } + ], + "sampleValues": [ + "stench" + ] + }, + { + "fieldPath": "names", + "uniqueCount": 1, + "uniqueProportion": 1.0, + "nullCount": 0, + "nullProportion": 0.0, + "sampleValues": [ + "[Row(language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), name='\u3042\u304f\u3057\u3085\u3046'), Row(language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), name='\uc545\ucde8'), Row(language=Row(name='zh-Hant', url='https://pokeapi.co/api/v2/language/4/'), name='\u60e1\u81ed'), Row(language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), name='Puanteur'), Row(language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), name='Duftnote'), Row(language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), name='Hedor'), Row(language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), name='Tanfo'), Row(language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), name='Stench'), Row(language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), name='\u3042\u304f\u3057\u3085\u3046'), Row(language=Row(name='zh-Hans', url='https://pokeapi.co/api/v2/language/12/'), name='\u6076\u81ed')]" + ] + }, + { + "fieldPath": "pokemon", + "uniqueCount": 1, + "uniqueProportion": 1.0, + "nullCount": 0, + "nullProportion": 0.0, + "sampleValues": [ + "[Row(is_hidden=True, pokemon=Row(name='gloom', url='https://pokeapi.co/api/v2/pokemon/44/'), slot=3), Row(is_hidden=False, pokemon=Row(name='grimer', url='https://pokeapi.co/api/v2/pokemon/88/'), slot=1), Row(is_hidden=False, pokemon=Row(name='muk', url='https://pokeapi.co/api/v2/pokemon/89/'), slot=1), Row(is_hidden=False, pokemon=Row(name='stunky', url='https://pokeapi.co/api/v2/pokemon/434/'), slot=1), Row(is_hidden=False, pokemon=Row(name='skuntank', url='https://pokeapi.co/api/v2/pokemon/435/'), slot=1), Row(is_hidden=False, pokemon=Row(name='trubbish', url='https://pokeapi.co/api/v2/pokemon/568/'), slot=1), Row(is_hidden=False, pokemon=Row(name='garbodor', url='https://pokeapi.co/api/v2/pokemon/569/'), slot=1), Row(is_hidden=False, pokemon=Row(name='garbodor-gmax', url='https://pokeapi.co/api/v2/pokemon/10198/'), slot=1)]" + ] + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" + }, + { + "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" + }, + { + "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", + "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" + }, + { + "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", + "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" + }, + { + "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", + "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" + }, + { + "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", + "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" + }, + { + "id": "urn:li:container:98a716614da5246426edd48260406364", + "urn": "urn:li:container:98a716614da5246426edd48260406364" + }, + { + "id": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", + "urn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_with_partition_autodetect_traverse_min_max.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_with_partition_autodetect_traverse_min_max.json new file mode 100644 index 00000000000000..a384a8f1e501de --- /dev/null +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_with_partition_autodetect_traverse_min_max.json @@ -0,0 +1,2572 @@ +[ +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv/part3.csv", + "number_of_files": "3", + "size_in_bytes": "3539" + }, + "name": "folder_aaa.food_csv", + "description": "", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "folder_aaa.food_csv", + "platform": "urn:li:dataPlatform:file", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "color", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "height", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "weight", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586833420000 + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "file", + "env": "UAT", + "folder_abs_path": "tests" + }, + "name": "tests" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:file" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "file", + "env": "UAT", + "folder_abs_path": "tests/integration" + }, + "name": "integration" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:file" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "file", + "env": "UAT", + "folder_abs_path": "tests/integration/s3" + }, + "name": "s3" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:file" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" + }, + { + "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "file", + "env": "UAT", + "folder_abs_path": "tests/integration/s3/test_data" + }, + "name": "test_data" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:file" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" + }, + { + "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" + }, + { + "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", + "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "file", + "env": "UAT", + "folder_abs_path": "tests/integration/s3/test_data/local_system" + }, + "name": "local_system" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:file" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" + }, + { + "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" + }, + { + "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", + "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" + }, + { + "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", + "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "file", + "env": "UAT", + "folder_abs_path": "tests/integration/s3/test_data/local_system/folder_a" + }, + "name": "folder_a" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:file" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:93525defb812252106d3b0c08a55e39a" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" + }, + { + "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" + }, + { + "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", + "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" + }, + { + "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", + "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" + }, + { + "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", + "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "file", + "env": "UAT", + "folder_abs_path": "tests/integration/s3/test_data/local_system/folder_a/folder_aa" + }, + "name": "folder_aa" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:file" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" + }, + { + "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" + }, + { + "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", + "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" + }, + { + "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", + "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" + }, + { + "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", + "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" + }, + { + "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", + "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "file", + "env": "UAT", + "folder_abs_path": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa" + }, + "name": "folder_aaa" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:file" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:98a716614da5246426edd48260406364" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" + }, + { + "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" + }, + { + "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", + "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" + }, + { + "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", + "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" + }, + { + "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", + "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" + }, + { + "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", + "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" + }, + { + "id": "urn:li:container:98a716614da5246426edd48260406364", + "urn": "urn:li:container:98a716614da5246426edd48260406364" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "datasetProfile", + "aspect": { + "json": { + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "rowCount": 4, + "columnCount": 4, + "fieldProfiles": [ + { + "fieldPath": "name", + "uniqueCount": 4, + "uniqueProportion": 1.0, + "nullCount": 0, + "nullProportion": 0.0, + "distinctValueFrequencies": [ + { + "value": "apple", + "frequency": 1 + }, + { + "value": "cookie", + "frequency": 1 + }, + { + "value": "lasagna", + "frequency": 1 + }, + { + "value": "pasta", + "frequency": 1 + } + ], + "sampleValues": [ + "apple", + "cookie", + "lasagna", + "pasta" + ] + }, + { + "fieldPath": "weight", + "uniqueCount": 4, + "uniqueProportion": 1.0, + "nullCount": 0, + "nullProportion": 0.0, + "distinctValueFrequencies": [ + { + "value": "23", + "frequency": 1 + }, + { + "value": "49", + "frequency": 1 + }, + { + "value": "50", + "frequency": 1 + }, + { + "value": "72", + "frequency": 1 + } + ], + "sampleValues": [ + "23", + "49", + "50", + "72" + ] + }, + { + "fieldPath": "height", + "uniqueCount": 1, + "uniqueProportion": 0.25, + "nullCount": 0, + "nullProportion": 0.0, + "distinctValueFrequencies": [ + { + "value": "4", + "frequency": 4 + } + ], + "sampleValues": [ + "4", + "4", + "4", + "4" + ] + }, + { + "fieldPath": "color", + "uniqueCount": 3, + "uniqueProportion": 0.75, + "nullCount": 0, + "nullProportion": 0.0, + "distinctValueFrequencies": [ + { + "value": "brown", + "frequency": 1 + }, + { + "value": "red", + "frequency": 2 + }, + { + "value": "yellow", + "frequency": 1 + } + ], + "sampleValues": [ + "brown", + "red", + "red", + "yellow" + ] + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" + }, + { + "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" + }, + { + "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", + "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" + }, + { + "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", + "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" + }, + { + "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", + "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" + }, + { + "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", + "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" + }, + { + "id": "urn:li:container:98a716614da5246426edd48260406364", + "urn": "urn:li:container:98a716614da5246426edd48260406364" + }, + { + "id": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", + "urn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet/part2.parquet", + "number_of_files": "2", + "size_in_bytes": "8412" + }, + "name": "folder_aaa.food_parquet", + "description": "", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "folder_aaa.food_parquet", + "platform": "urn:li:dataPlatform:file", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "color", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "healthy", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.BooleanType": {} + } + }, + "nativeDataType": "bool", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "height", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "int64", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "weight", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "int64", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586833440000 + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "datasetProfile", + "aspect": { + "json": { + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "rowCount": 70, + "columnCount": 5, + "fieldProfiles": [ + { + "fieldPath": "name", + "uniqueCount": 9, + "uniqueProportion": 0.13043478260869565, + "nullCount": 1, + "nullProportion": 0.014285714285714285, + "distinctValueFrequencies": [ + { + "value": "NullValue", + "frequency": 1 + }, + { + "value": "apple", + "frequency": 7 + }, + { + "value": "chicken", + "frequency": 7 + }, + { + "value": "cookie", + "frequency": 6 + }, + { + "value": "hamburger", + "frequency": 7 + }, + { + "value": "lasagna", + "frequency": 7 + }, + { + "value": "orange", + "frequency": 7 + }, + { + "value": "pasta", + "frequency": 7 + }, + { + "value": "spinach", + "frequency": 7 + }, + { + "value": "sushi", + "frequency": 7 + }, + { + "value": "water", + "frequency": 7 + } + ], + "sampleValues": [ + "apple", + "apple", + "apple", + "chicken", + "cookie", + "cookie", + "cookie", + "lasagna", + "lasagna", + "orange", + "orange", + "pasta", + "pasta", + "pasta", + "pasta", + "spinach", + "spinach", + "spinach", + "water", + "water" + ] + }, + { + "fieldPath": "weight", + "uniqueCount": 10, + "uniqueProportion": 0.14285714285714285, + "nullCount": 0, + "nullProportion": 0.0, + "distinctValueFrequencies": [ + { + "value": "10", + "frequency": 7 + }, + { + "value": "2", + "frequency": 7 + }, + { + "value": "23", + "frequency": 7 + }, + { + "value": "32", + "frequency": 7 + }, + { + "value": "36", + "frequency": 7 + }, + { + "value": "43", + "frequency": 7 + }, + { + "value": "49", + "frequency": 7 + }, + { + "value": "50", + "frequency": 7 + }, + { + "value": "53", + "frequency": 7 + }, + { + "value": "72", + "frequency": 7 + } + ], + "sampleValues": [ + "10", + "10", + "10", + "23", + "23", + "23", + "32", + "32", + "36", + "43", + "43", + "49", + "49", + "50", + "50", + "50", + "72", + "72", + "72", + "72" + ] + }, + { + "fieldPath": "height", + "uniqueCount": 4, + "uniqueProportion": 0.05714285714285714, + "nullCount": 0, + "nullProportion": 0.0, + "distinctValueFrequencies": [ + { + "value": "4", + "frequency": 24 + }, + { + "value": "5", + "frequency": 15 + }, + { + "value": "6", + "frequency": 23 + }, + { + "value": "7", + "frequency": 8 + } + ], + "sampleValues": [ + "4", + "4", + "4", + "4", + "4", + "4", + "4", + "5", + "5", + "5", + "5", + "5", + "6", + "6", + "6", + "6", + "6", + "6", + "7", + "7" + ] + }, + { + "fieldPath": "color", + "uniqueCount": 7, + "uniqueProportion": 0.1, + "nullCount": 0, + "nullProportion": 0.0, + "distinctValueFrequencies": [ + { + "value": "blue", + "frequency": 7 + }, + { + "value": "brown", + "frequency": 14 + }, + { + "value": "green", + "frequency": 7 + }, + { + "value": "orange", + "frequency": 14 + }, + { + "value": "red", + "frequency": 14 + }, + { + "value": "white", + "frequency": 7 + }, + { + "value": "yellow", + "frequency": 7 + } + ], + "sampleValues": [ + "blue", + "blue", + "brown", + "brown", + "brown", + "green", + "green", + "green", + "orange", + "orange", + "red", + "red", + "red", + "red", + "red", + "white", + "yellow", + "yellow", + "yellow", + "yellow" + ] + }, + { + "fieldPath": "healthy", + "uniqueCount": 2, + "uniqueProportion": 0.028985507246376812, + "nullCount": 1, + "nullProportion": 0.014285714285714285, + "sampleValues": [ + "False", + "False", + "False", + "False", + "False", + "False", + "False", + "False", + "False", + "None", + "True", + "True", + "True", + "True", + "True", + "True", + "True", + "True", + "True", + "True" + ] + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" + }, + { + "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" + }, + { + "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", + "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" + }, + { + "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", + "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" + }, + { + "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", + "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" + }, + { + "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", + "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" + }, + { + "id": "urn:li:container:98a716614da5246426edd48260406364", + "urn": "urn:li:container:98a716614da5246426edd48260406364" + }, + { + "id": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", + "urn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json/year=2022/month=jan/part3.json", + "number_of_files": "13", + "size_in_bytes": "188600" + }, + "name": "folder_aaa.pokemon_abilities_json", + "description": "", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "folder_aaa.pokemon_abilities_json", + "platform": "urn:li:dataPlatform:file", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "effect_changes", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries.effect", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries.language", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries.language.is_native", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries.language.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries.language.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.version_group", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.version_group.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.version_group.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_entries", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_entries.effect", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_entries.language", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_entries.language.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_entries.language.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_entries.short_effect", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries.flavor_text", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries.language", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries.language.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries.language.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries.version_group", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries.version_group.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries.version_group.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "generation", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "generation.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "generation.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "id", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "int", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "is_main_series", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.BooleanType": {} + } + }, + "nativeDataType": "bool", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "names", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "names.language", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "names.language.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "names.language.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "names.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "pokemon", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "pokemon.is_hidden", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.BooleanType": {} + } + }, + "nativeDataType": "bool", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "pokemon.pokemon", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "pokemon.pokemon.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "pokemon.pokemon.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "pokemon.slot", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "int", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586833590000 + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "datasetProfile", + "aspect": { + "json": { + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "rowCount": 1, + "columnCount": 9, + "fieldProfiles": [ + { + "fieldPath": "effect_changes", + "uniqueCount": 1, + "uniqueProportion": 1.0, + "nullCount": 0, + "nullProportion": 0.0, + "sampleValues": [ + "[Row(effect_entries=[Row(effect='Hat im Kampf keinen Effekt.', language=Row(is_native='no', name='de', url='https://pokeapi.co/api/v2/language/6/')), Row(effect='Has no effect in battle.', language=Row(is_native='yes', name='en', url='https://pokeapi.co/api/v2/language/9/'))], version_group=Row(name='black-white', url='https://pokeapi.co/api/v2/version-group/11/'))]" + ] + }, + { + "fieldPath": "effect_entries", + "uniqueCount": 1, + "uniqueProportion": 1.0, + "nullCount": 0, + "nullProportion": 0.0, + "sampleValues": [ + "[Row(effect='Attacken die Schaden verursachen haben mit jedem Treffer eine 10% Chance das Ziel zur\u00fcckschrecken zu lassen, wenn die Attacke dies nicht bereits als Nebeneffekt hat.\\n\\nDer Effekt stapelt nicht mit dem von getragenen Items.\\n\\nAu\u00dferhalb vom Kampf: Wenn ein Pok\u00e9mon mit dieser F\u00e4higkeit an erster Stelle im Team steht, tauchen wilde Pok\u00e9mon nur halb so oft auf.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), short_effect='Mit jedem Treffer besteht eine 10% Chance das Ziel zur\u00fcckschrecken zu lassen.'), Row(effect=\"This Pok\u00e9mon's damaging moves have a 10% chance to make the target flinch with each hit if they do not already cause flinching as a secondary effect.\\n\\nThis ability does not stack with a held item.\\n\\nOverworld: The wild encounter rate is halved while this Pok\u00e9mon is first in the party.\", language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), short_effect='Has a 10% chance of making target Pok\u00e9mon flinch with each hit.')]" + ] + }, + { + "fieldPath": "flavor_text_entries", + "uniqueCount": 1, + "uniqueProportion": 1.0, + "nullCount": 0, + "nullProportion": 0.0, + "sampleValues": [ + "[Row(flavor_text='Helps repel wild POK\u00e9MON.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='ruby-sapphire', url='https://pokeapi.co/api/v2/version-group/5/')), Row(flavor_text='Helps repel wild POK\u00e9MON.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='emerald', url='https://pokeapi.co/api/v2/version-group/6/')), Row(flavor_text='Helps repel wild POK\u00e9MON.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='firered-leafgreen', url='https://pokeapi.co/api/v2/version-group/7/')), Row(flavor_text='The stench helps keep\\nwild Pok\u00e9mon away.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='diamond-pearl', url='https://pokeapi.co/api/v2/version-group/8/')), Row(flavor_text='The stench helps keep\\nwild Pok\u00e9mon away.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='platinum', url='https://pokeapi.co/api/v2/version-group/9/')), Row(flavor_text='The stench helps keep\\nwild Pok\u00e9mon away.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='heartgold-soulsilver', url='https://pokeapi.co/api/v2/version-group/10/')), Row(flavor_text='La puanteur peut\\neffrayer l\u2019adversaire.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='black-white', url='https://pokeapi.co/api/v2/version-group/11/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='black-white', url='https://pokeapi.co/api/v2/version-group/11/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='black-2-white-2', url='https://pokeapi.co/api/v2/version-group/14/')), Row(flavor_text='\u304f\u3055\u304f\u3066\\u3000\u3042\u3044\u3066\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='\uc545\ucde8 \ub54c\ubb38\uc5d0 \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='La puanteur peut effrayer\\nl\u2019adversaire.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='L\u00e4sst den Gegner durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='Es posible que el rival retroceda\\npor el mal olor.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='A volte il cattivo odore\\nfa tentennare i nemici.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='\u81ed\u304f\u3066\\u3000\u76f8\u624b\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='\u304f\u3055\u304f\u3066\\u3000\u3042\u3044\u3066\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='\uc545\ucde8 \ub54c\ubb38\uc5d0 \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='La puanteur peut effrayer\\nl\u2019adversaire.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='L\u00e4sst den Gegner durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='Es posible que el rival retroceda\\npor el mal olor.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='A volte il cattivo odore\\nfa tentennare i nemici.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='\u81ed\u304f\u3066\\u3000\u76f8\u624b\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='\u304f\u3055\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u306f\u306a\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u3053\u3046\u3052\u304d\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u3042\u3044\u3066\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\uc545\ucde8\ub97c \ud48d\uaca8\uc11c\\n\uacf5\uaca9\ud588\uc744 \ub54c \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u767c\u51fa\u81ed\u6c23\uff0c\\n\u5728\u653b\u64ca\u7684\u6642\u5019\uff0c\\n\u6709\u6642\u6703\u4f7f\u5c0d\u624b\u754f\u7e2e\u3002', language=Row(name='zh-Hant', url='https://pokeapi.co/api/v2/language/4/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='Le Pok\u00e9mon \u00e9met une odeur si naus\u00e9abonde\\nqu\u2019il peut effrayer sa cible.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='L\u00e4sst das Ziel beim Angriff eventuell durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='Debido al mal olor que emana, al atacar al rival puede\\nhacerlo retroceder.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='A volte il cattivo odore emesso dal Pok\u00e9mon\\nfa tentennare i nemici quando attacca.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='By releasing stench when attacking, this Pok\u00e9mon\\nmay cause the target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u81ed\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u653e\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u653b\u6483\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u76f8\u624b\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u901a\u8fc7\u91ca\u653e\u81ed\u81ed\u7684\u6c14\u5473\uff0c\\n\u5728\u653b\u51fb\u7684\u65f6\u5019\uff0c\\n\u6709\u65f6\u4f1a\u4f7f\u5bf9\u624b\u754f\u7f29\u3002', language=Row(name='zh-Hans', url='https://pokeapi.co/api/v2/language/12/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u304f\u3055\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u306f\u306a\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u3053\u3046\u3052\u304d\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u3042\u3044\u3066\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\uc545\ucde8\ub97c \ud48d\uaca8\uc11c\\n\uacf5\uaca9\ud588\uc744 \ub54c \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\u767c\u51fa\u81ed\u6c23\uff0c\\n\u5728\u653b\u64ca\u7684\u6642\u5019\uff0c\\n\u6709\u6642\u6703\u4f7f\u5c0d\u624b\u754f\u7e2e\u3002', language=Row(name='zh-Hant', url='https://pokeapi.co/api/v2/language/4/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='Le Pok\u00e9mon \u00e9met une odeur si naus\u00e9abonde\\nqu\u2019il peut effrayer sa cible.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='L\u00e4sst das Ziel beim Angriff eventuell durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='Debido al mal olor que emana, al atacar al rival puede\\nhacerlo retroceder.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='A volte il cattivo odore emesso dal Pok\u00e9mon\\nfa tentennare i nemici quando attacca.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='By releasing stench when attacking, this Pok\u00e9mon\\nmay cause the target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\u81ed\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u653e\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u653b\u6483\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u76f8\u624b\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\u901a\u8fc7\u91ca\u653e\u81ed\u81ed\u7684\u6c14\u5473\uff0c\\n\u5728\u653b\u51fb\u7684\u65f6\u5019\uff0c\\n\u6709\u65f6\u4f1a\u4f7f\u5bf9\u624b\u754f\u7f29\u3002', language=Row(name='zh-Hans', url='https://pokeapi.co/api/v2/language/12/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/'))]" + ] + }, + { + "fieldPath": "generation", + "uniqueCount": 1, + "uniqueProportion": 1.0, + "nullCount": 0, + "nullProportion": 0.0, + "sampleValues": [ + "Row(name='generation-iii', url='https://pokeapi.co/api/v2/generation/3/')" + ] + }, + { + "fieldPath": "id", + "uniqueCount": 1, + "uniqueProportion": 1.0, + "nullCount": 0, + "nullProportion": 0.0, + "distinctValueFrequencies": [ + { + "value": "1", + "frequency": 1 + } + ], + "sampleValues": [ + "1" + ] + }, + { + "fieldPath": "is_main_series", + "uniqueCount": 1, + "uniqueProportion": 1.0, + "nullCount": 0, + "nullProportion": 0.0, + "sampleValues": [ + "True" + ] + }, + { + "fieldPath": "name", + "uniqueCount": 1, + "uniqueProportion": 1.0, + "nullCount": 0, + "nullProportion": 0.0, + "distinctValueFrequencies": [ + { + "value": "stench", + "frequency": 1 + } + ], + "sampleValues": [ + "stench" + ] + }, + { + "fieldPath": "names", + "uniqueCount": 1, + "uniqueProportion": 1.0, + "nullCount": 0, + "nullProportion": 0.0, + "sampleValues": [ + "[Row(language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), name='\u3042\u304f\u3057\u3085\u3046'), Row(language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), name='\uc545\ucde8'), Row(language=Row(name='zh-Hant', url='https://pokeapi.co/api/v2/language/4/'), name='\u60e1\u81ed'), Row(language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), name='Puanteur'), Row(language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), name='Duftnote'), Row(language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), name='Hedor'), Row(language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), name='Tanfo'), Row(language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), name='Stench'), Row(language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), name='\u3042\u304f\u3057\u3085\u3046'), Row(language=Row(name='zh-Hans', url='https://pokeapi.co/api/v2/language/12/'), name='\u6076\u81ed')]" + ] + }, + { + "fieldPath": "pokemon", + "uniqueCount": 1, + "uniqueProportion": 1.0, + "nullCount": 0, + "nullProportion": 0.0, + "sampleValues": [ + "[Row(is_hidden=True, pokemon=Row(name='gloom', url='https://pokeapi.co/api/v2/pokemon/44/'), slot=3), Row(is_hidden=False, pokemon=Row(name='grimer', url='https://pokeapi.co/api/v2/pokemon/88/'), slot=1), Row(is_hidden=False, pokemon=Row(name='muk', url='https://pokeapi.co/api/v2/pokemon/89/'), slot=1), Row(is_hidden=False, pokemon=Row(name='stunky', url='https://pokeapi.co/api/v2/pokemon/434/'), slot=1), Row(is_hidden=False, pokemon=Row(name='skuntank', url='https://pokeapi.co/api/v2/pokemon/435/'), slot=1), Row(is_hidden=False, pokemon=Row(name='trubbish', url='https://pokeapi.co/api/v2/pokemon/568/'), slot=1), Row(is_hidden=False, pokemon=Row(name='garbodor', url='https://pokeapi.co/api/v2/pokemon/569/'), slot=1), Row(is_hidden=False, pokemon=Row(name='garbodor-gmax', url='https://pokeapi.co/api/v2/pokemon/10198/'), slot=1)]" + ] + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", + "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" + }, + { + "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", + "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" + }, + { + "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", + "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" + }, + { + "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", + "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" + }, + { + "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", + "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" + }, + { + "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", + "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" + }, + { + "id": "urn:li:container:98a716614da5246426edd48260406364", + "urn": "urn:li:container:98a716614da5246426edd48260406364" + }, + { + "id": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", + "urn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_without_extension.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_without_extension.json index d50f00efacaa06..06678512199bf6 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_without_extension.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_without_extension.json @@ -12,7 +12,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json" + "runId": "file_without_extension.json", + "lastRunId": "no-run-id-provided" } }, { @@ -34,7 +35,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json" + "runId": "file_without_extension.json", + "lastRunId": "no-run-id-provided" } }, { @@ -127,54 +129,51 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json" + "runId": "file_without_extension.json", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/no_extension/small,DEV)", + "entityType": "container", + "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", "changeType": "UPSERT", - "aspectName": "operation", + "aspectName": "containerProperties", "aspect": { "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "customProperties": { + "platform": "s3", + "instance": "test-platform-instance", + "env": "DEV", + "folder_abs_path": "my-test-bucket/folder_a" }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586848010000 + "name": "folder_a" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json" + "runId": "file_without_extension.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", + "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "container", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "instance": "test-platform-instance", - "env": "DEV", - "bucket_name": "my-test-bucket" - }, - "name": "my-test-bucket" + "container": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json" + "runId": "file_without_extension.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", + "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -184,96 +183,103 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json" + "runId": "file_without_extension.json", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "container", - "entityUrn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/no_extension/small,DEV)", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "operation", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3", - "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847850000 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json" + "runId": "file_without_extension.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", + "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "S3 bucket" + "Folder" ] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json" + "runId": "file_without_extension.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "containerProperties", "aspect": { "json": { - "path": [ - { - "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", - "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" - } - ] + "customProperties": { + "platform": "s3", + "instance": "test-platform-instance", + "env": "DEV", + "bucket_name": "my-test-bucket" + }, + "name": "my-test-bucket" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json" + "runId": "file_without_extension.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", + "entityUrn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "status", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "instance": "test-platform-instance", - "env": "DEV", - "folder_abs_path": "my-test-bucket/folder_a" - }, - "name": "folder_a" + "removed": false } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json" + "runId": "file_without_extension.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", + "entityUrn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "removed": false + "platform": "urn:li:dataPlatform:s3", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json" + "runId": "file_without_extension.json", + "lastRunId": "no-run-id-provided" } }, { @@ -289,39 +295,47 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json" + "runId": "file_without_extension.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", + "entityUrn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "Folder" + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + } ] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json" + "runId": "file_without_extension.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", + "entityUrn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "subTypes", "aspect": { "json": { - "container": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" + "typeNames": [ + "S3 bucket" + ] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json" + "runId": "file_without_extension.json", + "lastRunId": "no-run-id-provided" } }, { @@ -345,12 +359,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json" + "runId": "file_without_extension.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", + "entityUrn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { @@ -359,50 +374,52 @@ "platform": "s3", "instance": "test-platform-instance", "env": "DEV", - "folder_abs_path": "my-test-bucket/folder_a/folder_aa" + "folder_abs_path": "my-test-bucket/folder_a/folder_aa/folder_aaa" }, - "name": "folder_aa" + "name": "folder_aaa" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json" + "runId": "file_without_extension.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", + "entityUrn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "container", "aspect": { "json": { - "removed": false + "container": "urn:li:container:b0037296cdd497e3137aa0628b8687bc" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json" + "runId": "file_without_extension.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", + "entityUrn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "status", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3", - "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + "removed": false } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json" + "runId": "file_without_extension.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", + "entityUrn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -414,86 +431,63 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json" + "runId": "file_without_extension.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "containerProperties", "aspect": { "json": { - "container": "urn:li:container:c8d940d2010edd365619411b385b11e4" + "customProperties": { + "platform": "s3", + "instance": "test-platform-instance", + "env": "DEV", + "folder_abs_path": "my-test-bucket/folder_a/folder_aa" + }, + "name": "folder_aa" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json" + "runId": "file_without_extension.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", - "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" - }, - { - "id": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", - "urn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" - }, - { - "id": "urn:li:container:c8d940d2010edd365619411b385b11e4", - "urn": "urn:li:container:c8d940d2010edd365619411b385b11e4" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "file_without_extension.json" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", - "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "status", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "instance": "test-platform-instance", - "env": "DEV", - "folder_abs_path": "my-test-bucket/folder_a/folder_aa/folder_aaa" - }, - "name": "folder_aaa" + "removed": false } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json" + "runId": "file_without_extension.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", + "entityUrn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "removed": false + "platform": "urn:li:dataPlatform:s3", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json" + "runId": "file_without_extension.json", + "lastRunId": "no-run-id-provided" } }, { @@ -509,12 +503,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json" + "runId": "file_without_extension.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", + "entityUrn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -526,22 +521,37 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json" + "runId": "file_without_extension.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", + "entityUrn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b0037296cdd497e3137aa0628b8687bc" + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + }, + { + "id": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", + "urn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" + }, + { + "id": "urn:li:container:c8d940d2010edd365619411b385b11e4", + "urn": "urn:li:container:c8d940d2010edd365619411b385b11e4" + } + ] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json" + "runId": "file_without_extension.json", + "lastRunId": "no-run-id-provided" } }, { @@ -573,7 +583,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json" + "runId": "file_without_extension.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:c8d940d2010edd365619411b385b11e4" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "file_without_extension.json", + "lastRunId": "no-run-id-provided" } }, { @@ -594,38 +621,40 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json" + "runId": "file_without_extension.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:9b4624d58669059c9e62afb3d7341944", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "container", "aspect": { "json": { - "removed": false + "container": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json" + "runId": "file_without_extension.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:9b4624d58669059c9e62afb3d7341944", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "status", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3", - "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + "removed": false } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json" + "runId": "file_without_extension.json", + "lastRunId": "no-run-id-provided" } }, { @@ -642,22 +671,25 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json" + "runId": "file_without_extension.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:9b4624d58669059c9e62afb3d7341944", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "container": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9" + "platform": "urn:li:dataPlatform:s3", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json" + "runId": "file_without_extension.json", + "lastRunId": "no-run-id-provided" } }, { @@ -693,7 +725,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json" + "runId": "file_without_extension.json", + "lastRunId": "no-run-id-provided" } }, { @@ -708,7 +741,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json" + "runId": "file_without_extension.json", + "lastRunId": "no-run-id-provided" } }, { @@ -723,7 +757,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json" + "runId": "file_without_extension.json", + "lastRunId": "no-run-id-provided" } }, { @@ -763,7 +798,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json" + "runId": "file_without_extension.json", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition.json index 0eefc4cafbd620..8e1daed0070f10 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition.json @@ -7,7 +7,7 @@ "aspect": { "json": { "customProperties": { - "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv/part2.csv" + "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv/part3.csv" }, "name": "food_csv", "description": "", @@ -16,7 +16,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -56,18 +57,6 @@ "recursive": false, "isPartOfKey": false }, - { - "fieldPath": "healthy", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.BooleanType": {} - } - }, - "nativeDataType": "boolean", - "recursive": false, - "isPartOfKey": false - }, { "fieldPath": "height", "nullable": false, @@ -109,28 +98,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "operation", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847990000 - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -150,7 +119,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -165,22 +135,30 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "container", - "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "operation", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847820000 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -197,7 +175,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:s3" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -212,7 +207,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -232,7 +228,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -247,7 +244,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -262,39 +260,47 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "containerProperties", "aspect": { "json": { - "typeNames": [ - "Folder" - ] + "customProperties": { + "platform": "s3", + "env": "UAT", + "folder_abs_path": "my-test-bucket/folder_a/folder_aa" + }, + "name": "folder_aa" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "subTypes", "aspect": { "json": { - "container": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + "typeNames": [ + "Folder" + ] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -314,27 +320,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "container", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "env": "UAT", - "folder_abs_path": "my-test-bucket/folder_a/folder_aa" - }, - "name": "folder_aa" + "container": "urn:li:container:86297df39321e4948dbe8b8e941de98b" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -349,22 +352,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "container", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "container": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -381,22 +386,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "container": "urn:li:container:86297df39321e4948dbe8b8e941de98b" + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -420,7 +427,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -440,37 +448,40 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "container", "aspect": { "json": { - "removed": false + "container": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "status", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "removed": false } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -487,22 +498,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "container": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -530,7 +543,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -545,39 +559,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "status", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", - "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" - }, - { - "id": "urn:li:container:86297df39321e4948dbe8b8e941de98b", - "urn": "urn:li:container:86297df39321e4948dbe8b8e941de98b" - }, - { - "id": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", - "urn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" - }, - { - "id": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", - "urn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" - } - ] + "removed": false } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -597,7 +596,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -690,7 +690,41 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + }, + { + "id": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "urn": "urn:li:container:86297df39321e4948dbe8b8e941de98b" + }, + { + "id": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "urn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" + }, + { + "id": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", + "urn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -702,16 +736,17 @@ "json": { "timestampMillis": 1615443388097, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847810000 + "lastUpdatedTimestamp": 1586847840000 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -726,7 +761,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } }, { @@ -758,37 +810,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_no_partition.json" + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_exclude.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_exclude.json index 36f40cc918735a..0d4c2d14d4ac16 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_exclude.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_exclude.json @@ -16,7 +16,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_exclude.json" + "runId": "folder_no_partition_exclude.json", + "lastRunId": "no-run-id-provided" } }, { @@ -109,28 +110,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_exclude.json" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "operation", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847810000 - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_no_partition_exclude.json" + "runId": "folder_no_partition_exclude.json", + "lastRunId": "no-run-id-provided" } }, { @@ -150,7 +131,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_exclude.json" + "runId": "folder_no_partition_exclude.json", + "lastRunId": "no-run-id-provided" } }, { @@ -165,22 +147,30 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_exclude.json" + "runId": "folder_no_partition_exclude.json", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "container", - "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "operation", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847840000 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_exclude.json" + "runId": "folder_no_partition_exclude.json", + "lastRunId": "no-run-id-provided" } }, { @@ -197,206 +187,219 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_exclude.json" + "runId": "folder_no_partition_exclude.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "path": [] + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_exclude.json" + "runId": "folder_no_partition_exclude.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "browsePathsV2", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "env": "UAT", - "folder_abs_path": "my-test-bucket/folder_a" - }, - "name": "folder_a" + "path": [] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_exclude.json" + "runId": "folder_no_partition_exclude.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "containerProperties", "aspect": { "json": { - "removed": false + "customProperties": { + "platform": "s3", + "env": "UAT", + "folder_abs_path": "my-test-bucket/folder_a/folder_aa" + }, + "name": "folder_aa" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_exclude.json" + "runId": "folder_no_partition_exclude.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "container", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "container": "urn:li:container:86297df39321e4948dbe8b8e941de98b" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_exclude.json" + "runId": "folder_no_partition_exclude.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "status", "aspect": { "json": { - "typeNames": [ - "Folder" - ] + "removed": false } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_exclude.json" + "runId": "folder_no_partition_exclude.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "containerProperties", "aspect": { "json": { - "container": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + "customProperties": { + "platform": "s3", + "env": "UAT", + "folder_abs_path": "my-test-bucket/folder_a" + }, + "name": "folder_a" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_exclude.json" + "runId": "folder_no_partition_exclude.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "status", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", - "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" - } - ] + "removed": false } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_exclude.json" + "runId": "folder_no_partition_exclude.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "env": "UAT", - "folder_abs_path": "my-test-bucket/folder_a/folder_aa" - }, - "name": "folder_aa" + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_exclude.json" + "runId": "folder_no_partition_exclude.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "subTypes", "aspect": { "json": { - "removed": false + "typeNames": [ + "Folder" + ] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_exclude.json" + "runId": "folder_no_partition_exclude.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "subTypes", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "typeNames": [ + "Folder" + ] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_exclude.json" + "runId": "folder_no_partition_exclude.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "Folder" + "path": [ + { + "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + } ] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_exclude.json" + "runId": "folder_no_partition_exclude.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "container": "urn:li:container:86297df39321e4948dbe8b8e941de98b" + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_exclude.json" + "runId": "folder_no_partition_exclude.json", + "lastRunId": "no-run-id-provided" } }, { @@ -420,7 +423,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_exclude.json" + "runId": "folder_no_partition_exclude.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition_exclude.json", + "lastRunId": "no-run-id-provided" } }, { @@ -440,37 +460,40 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_exclude.json" + "runId": "folder_no_partition_exclude.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "container", "aspect": { "json": { - "removed": false + "container": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_exclude.json" + "runId": "folder_no_partition_exclude.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "status", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "removed": false } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_exclude.json" + "runId": "folder_no_partition_exclude.json", + "lastRunId": "no-run-id-provided" } }, { @@ -487,22 +510,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_exclude.json" + "runId": "folder_no_partition_exclude.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "container": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_exclude.json" + "runId": "folder_no_partition_exclude.json", + "lastRunId": "no-run-id-provided" } }, { @@ -530,37 +555,40 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_exclude.json" + "runId": "folder_no_partition_exclude.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "status", "aspect": { "json": { - "container": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" + "removed": false } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_exclude.json" + "runId": "folder_no_partition_exclude.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "container", "aspect": { "json": { - "removed": false + "container": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_exclude.json" + "runId": "folder_no_partition_exclude.json", + "lastRunId": "no-run-id-provided" } }, { @@ -592,7 +620,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_exclude.json" + "runId": "folder_no_partition_exclude.json", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_filename.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_filename.json index 7a59a7f21f1180..7172bba73f01c4 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_filename.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_filename.json @@ -7,7 +7,7 @@ "aspect": { "json": { "customProperties": { - "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv/part2.csv" + "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv/part3.csv" }, "name": "folder_aaa.food_csv", "description": "", @@ -16,7 +16,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -56,18 +57,6 @@ "recursive": false, "isPartOfKey": false }, - { - "fieldPath": "healthy", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.BooleanType": {} - } - }, - "nativeDataType": "boolean", - "recursive": false, - "isPartOfKey": false - }, { "fieldPath": "height", "nullable": false, @@ -109,28 +98,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "operation", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847990000 - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -150,7 +119,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -165,22 +135,30 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "container", - "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "operation", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847820000 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -197,7 +175,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:s3" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -212,7 +207,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -232,7 +228,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -247,7 +244,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -262,39 +260,47 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "containerProperties", "aspect": { "json": { - "typeNames": [ - "Folder" - ] + "customProperties": { + "platform": "s3", + "env": "UAT", + "folder_abs_path": "my-test-bucket/folder_a/folder_aa" + }, + "name": "folder_aa" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "subTypes", "aspect": { "json": { - "container": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + "typeNames": [ + "Folder" + ] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -314,27 +320,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "container", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "env": "UAT", - "folder_abs_path": "my-test-bucket/folder_a/folder_aa" - }, - "name": "folder_aa" + "container": "urn:li:container:86297df39321e4948dbe8b8e941de98b" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -349,22 +352,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "container", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "container": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -381,22 +386,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "container": "urn:li:container:86297df39321e4948dbe8b8e941de98b" + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -420,7 +427,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -440,37 +448,40 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "container", "aspect": { "json": { - "removed": false + "container": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "status", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "removed": false } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -487,22 +498,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "container": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -530,7 +543,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -545,39 +559,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "status", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", - "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" - }, - { - "id": "urn:li:container:86297df39321e4948dbe8b8e941de98b", - "urn": "urn:li:container:86297df39321e4948dbe8b8e941de98b" - }, - { - "id": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", - "urn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" - }, - { - "id": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", - "urn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" - } - ] + "removed": false } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -597,7 +596,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -690,7 +690,41 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + }, + { + "id": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "urn": "urn:li:container:86297df39321e4948dbe8b8e941de98b" + }, + { + "id": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "urn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" + }, + { + "id": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", + "urn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -702,16 +736,17 @@ "json": { "timestampMillis": 1615443388097, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847810000 + "lastUpdatedTimestamp": 1586847840000 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -726,7 +761,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } }, { @@ -758,37 +810,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json" + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_glob.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_glob.json index 0dc71b900b03b6..0372a6afd94577 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_glob.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_glob.json @@ -16,7 +16,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_glob.json" + "runId": "folder_no_partition_glob.json", + "lastRunId": "no-run-id-provided" } }, { @@ -109,28 +110,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_glob.json" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "operation", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847810000 - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_no_partition_glob.json" + "runId": "folder_no_partition_glob.json", + "lastRunId": "no-run-id-provided" } }, { @@ -150,7 +131,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_glob.json" + "runId": "folder_no_partition_glob.json", + "lastRunId": "no-run-id-provided" } }, { @@ -165,22 +147,30 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_glob.json" + "runId": "folder_no_partition_glob.json", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "container", - "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "operation", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847840000 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_glob.json" + "runId": "folder_no_partition_glob.json", + "lastRunId": "no-run-id-provided" } }, { @@ -197,206 +187,219 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_glob.json" + "runId": "folder_no_partition_glob.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "path": [] + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_glob.json" + "runId": "folder_no_partition_glob.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "browsePathsV2", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "env": "UAT", - "folder_abs_path": "my-test-bucket/folder_a" - }, - "name": "folder_a" + "path": [] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_glob.json" + "runId": "folder_no_partition_glob.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "containerProperties", "aspect": { "json": { - "removed": false + "customProperties": { + "platform": "s3", + "env": "UAT", + "folder_abs_path": "my-test-bucket/folder_a/folder_aa" + }, + "name": "folder_aa" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_glob.json" + "runId": "folder_no_partition_glob.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "container", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "container": "urn:li:container:86297df39321e4948dbe8b8e941de98b" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_glob.json" + "runId": "folder_no_partition_glob.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "status", "aspect": { "json": { - "typeNames": [ - "Folder" - ] + "removed": false } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_glob.json" + "runId": "folder_no_partition_glob.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "containerProperties", "aspect": { "json": { - "container": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + "customProperties": { + "platform": "s3", + "env": "UAT", + "folder_abs_path": "my-test-bucket/folder_a" + }, + "name": "folder_a" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_glob.json" + "runId": "folder_no_partition_glob.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "status", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", - "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" - } - ] + "removed": false } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_glob.json" + "runId": "folder_no_partition_glob.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "env": "UAT", - "folder_abs_path": "my-test-bucket/folder_a/folder_aa" - }, - "name": "folder_aa" + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_glob.json" + "runId": "folder_no_partition_glob.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "subTypes", "aspect": { "json": { - "removed": false + "typeNames": [ + "Folder" + ] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_glob.json" + "runId": "folder_no_partition_glob.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "subTypes", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "typeNames": [ + "Folder" + ] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_glob.json" + "runId": "folder_no_partition_glob.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "Folder" + "path": [ + { + "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + } ] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_glob.json" + "runId": "folder_no_partition_glob.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "container": "urn:li:container:86297df39321e4948dbe8b8e941de98b" + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_glob.json" + "runId": "folder_no_partition_glob.json", + "lastRunId": "no-run-id-provided" } }, { @@ -420,7 +423,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_glob.json" + "runId": "folder_no_partition_glob.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition_glob.json", + "lastRunId": "no-run-id-provided" } }, { @@ -440,37 +460,40 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_glob.json" + "runId": "folder_no_partition_glob.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "container", "aspect": { "json": { - "removed": false + "container": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_glob.json" + "runId": "folder_no_partition_glob.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "status", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "removed": false } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_glob.json" + "runId": "folder_no_partition_glob.json", + "lastRunId": "no-run-id-provided" } }, { @@ -487,22 +510,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_glob.json" + "runId": "folder_no_partition_glob.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "container": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_glob.json" + "runId": "folder_no_partition_glob.json", + "lastRunId": "no-run-id-provided" } }, { @@ -530,37 +555,40 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_glob.json" + "runId": "folder_no_partition_glob.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "status", "aspect": { "json": { - "container": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" + "removed": false } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_glob.json" + "runId": "folder_no_partition_glob.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "container", "aspect": { "json": { - "removed": false + "container": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_glob.json" + "runId": "folder_no_partition_glob.json", + "lastRunId": "no-run-id-provided" } }, { @@ -592,7 +620,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_no_partition_glob.json" + "runId": "folder_no_partition_glob.json", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_basic.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_basic.json index aa683a2bcd4c21..6ff3925b1afb34 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_basic.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_basic.json @@ -7,16 +7,24 @@ "aspect": { "json": { "customProperties": { - "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json/year=2022/month=jan/part3.json" + "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json/year=2022/month=jan/part3.json", + "number_of_partitions": "1" }, "name": "folder_aaa.pokemon_abilities_json", "description": "", + "created": { + "time": 1586847980000 + }, + "lastModified": { + "time": 1586847980000 + }, "tags": [] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_basic.json" + "runId": "folder_partition_basic.json", + "lastRunId": "no-run-id-provided" } }, { @@ -541,48 +549,54 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_basic.json" + "runId": "folder_partition_basic.json", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "entityType": "container", + "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", "changeType": "UPSERT", - "aspectName": "operation", + "aspectName": "containerProperties", "aspect": { "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "customProperties": { + "platform": "s3", + "env": "UAT", + "bucket_name": "my-test-bucket" }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847960000 + "name": "my-test-bucket" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_basic.json" + "runId": "folder_partition_basic.json", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "container", - "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "partitionsSummary", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "env": "UAT", - "bucket_name": "my-test-bucket" + "minPartition": { + "partition": "partition_0=year=2022/partition_1=month=jan", + "createdTime": 1586847980000, + "lastModifiedTime": 1586847980000 }, - "name": "my-test-bucket" + "maxPartition": { + "partition": "partition_0=year=2022/partition_1=month=jan", + "createdTime": 1586847980000, + "lastModifiedTime": 1586847980000 + } } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_basic.json" + "runId": "folder_partition_basic.json", + "lastRunId": "no-run-id-provided" } }, { @@ -597,22 +611,30 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_basic.json" + "runId": "folder_partition_basic.json", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "container", - "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "operation", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847990000 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_basic.json" + "runId": "folder_partition_basic.json", + "lastRunId": "no-run-id-provided" } }, { @@ -629,7 +651,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_basic.json" + "runId": "folder_partition_basic.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:s3" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_basic.json", + "lastRunId": "no-run-id-provided" } }, { @@ -644,12 +683,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_basic.json" + "runId": "folder_partition_basic.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { @@ -657,49 +697,52 @@ "customProperties": { "platform": "s3", "env": "UAT", - "folder_abs_path": "my-test-bucket/folder_a" + "folder_abs_path": "my-test-bucket/folder_a/folder_aa" }, - "name": "folder_a" + "name": "folder_aa" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_basic.json" + "runId": "folder_partition_basic.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "container", "aspect": { "json": { - "removed": false + "container": "urn:li:container:86297df39321e4948dbe8b8e941de98b" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_basic.json" + "runId": "folder_partition_basic.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "status", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "removed": false } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_basic.json" + "runId": "folder_partition_basic.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -711,77 +754,61 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_basic.json" + "runId": "folder_partition_basic.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "containerProperties", "aspect": { "json": { - "container": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + "customProperties": { + "platform": "s3", + "env": "UAT", + "folder_abs_path": "my-test-bucket/folder_a" + }, + "name": "folder_a" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_basic.json" + "runId": "folder_partition_basic.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", - "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_basic.json" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", - "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "status", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "env": "UAT", - "folder_abs_path": "my-test-bucket/folder_a/folder_aa" - }, - "name": "folder_aa" + "removed": false } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_basic.json" + "runId": "folder_partition_basic.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "removed": false + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_basic.json" + "runId": "folder_partition_basic.json", + "lastRunId": "no-run-id-provided" } }, { @@ -796,12 +823,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_basic.json" + "runId": "folder_partition_basic.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -813,22 +841,29 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_basic.json" + "runId": "folder_partition_basic.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:86297df39321e4948dbe8b8e941de98b" + "path": [ + { + "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + } + ] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_basic.json" + "runId": "folder_partition_basic.json", + "lastRunId": "no-run-id-provided" } }, { @@ -852,7 +887,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_basic.json" + "runId": "folder_partition_basic.json", + "lastRunId": "no-run-id-provided" } }, { @@ -872,37 +908,56 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_basic.json" + "runId": "folder_partition_basic.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_basic.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "container", "aspect": { "json": { - "removed": false + "container": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_basic.json" + "runId": "folder_partition_basic.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "status", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "removed": false } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_basic.json" + "runId": "folder_partition_basic.json", + "lastRunId": "no-run-id-provided" } }, { @@ -919,22 +974,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_basic.json" + "runId": "folder_partition_basic.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "container": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_basic.json" + "runId": "folder_partition_basic.json", + "lastRunId": "no-run-id-provided" } }, { @@ -962,7 +1019,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_basic.json" + "runId": "folder_partition_basic.json", + "lastRunId": "no-run-id-provided" } }, { @@ -977,7 +1035,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_basic.json" + "runId": "folder_partition_basic.json", + "lastRunId": "no-run-id-provided" } }, { @@ -992,7 +1051,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_basic.json" + "runId": "folder_partition_basic.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1024,7 +1084,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_basic.json" + "runId": "folder_partition_basic.json", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_keyval.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_keyval.json index 095c1cbe3152b9..86fa7835008dd4 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_keyval.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_keyval.json @@ -7,16 +7,24 @@ "aspect": { "json": { "customProperties": { - "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json/year=2022/month=jan/part3.json" + "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json/year=2022/month=jan/part3.json", + "number_of_partitions": "1" }, "name": "folder_aaa.pokemon_abilities_json", "description": "", + "created": { + "time": 1586847980000 + }, + "lastModified": { + "time": 1586847980000 + }, "tags": [] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_keyval.json" + "runId": "folder_partition_keyval.json", + "lastRunId": "no-run-id-provided" } }, { @@ -541,48 +549,54 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_keyval.json" + "runId": "folder_partition_keyval.json", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "entityType": "container", + "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", "changeType": "UPSERT", - "aspectName": "operation", + "aspectName": "containerProperties", "aspect": { "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "customProperties": { + "platform": "s3", + "env": "UAT", + "bucket_name": "my-test-bucket" }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847960000 + "name": "my-test-bucket" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_keyval.json" + "runId": "folder_partition_keyval.json", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "container", - "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "partitionsSummary", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "env": "UAT", - "bucket_name": "my-test-bucket" + "minPartition": { + "partition": "year=2022/month=jan", + "createdTime": 1586847980000, + "lastModifiedTime": 1586847980000 }, - "name": "my-test-bucket" + "maxPartition": { + "partition": "year=2022/month=jan", + "createdTime": 1586847980000, + "lastModifiedTime": 1586847980000 + } } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_keyval.json" + "runId": "folder_partition_keyval.json", + "lastRunId": "no-run-id-provided" } }, { @@ -597,22 +611,30 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_keyval.json" + "runId": "folder_partition_keyval.json", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "container", - "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "operation", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847990000 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_keyval.json" + "runId": "folder_partition_keyval.json", + "lastRunId": "no-run-id-provided" } }, { @@ -629,7 +651,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_keyval.json" + "runId": "folder_partition_keyval.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:s3" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_keyval.json", + "lastRunId": "no-run-id-provided" } }, { @@ -644,12 +683,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_keyval.json" + "runId": "folder_partition_keyval.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { @@ -657,49 +697,52 @@ "customProperties": { "platform": "s3", "env": "UAT", - "folder_abs_path": "my-test-bucket/folder_a" + "folder_abs_path": "my-test-bucket/folder_a/folder_aa" }, - "name": "folder_a" + "name": "folder_aa" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_keyval.json" + "runId": "folder_partition_keyval.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "container", "aspect": { "json": { - "removed": false + "container": "urn:li:container:86297df39321e4948dbe8b8e941de98b" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_keyval.json" + "runId": "folder_partition_keyval.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "status", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "removed": false } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_keyval.json" + "runId": "folder_partition_keyval.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -711,77 +754,61 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_keyval.json" + "runId": "folder_partition_keyval.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "containerProperties", "aspect": { "json": { - "container": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + "customProperties": { + "platform": "s3", + "env": "UAT", + "folder_abs_path": "my-test-bucket/folder_a" + }, + "name": "folder_a" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_keyval.json" + "runId": "folder_partition_keyval.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", - "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_keyval.json" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", - "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "status", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "env": "UAT", - "folder_abs_path": "my-test-bucket/folder_a/folder_aa" - }, - "name": "folder_aa" + "removed": false } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_keyval.json" + "runId": "folder_partition_keyval.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "removed": false + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_keyval.json" + "runId": "folder_partition_keyval.json", + "lastRunId": "no-run-id-provided" } }, { @@ -796,12 +823,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_keyval.json" + "runId": "folder_partition_keyval.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -813,22 +841,29 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_keyval.json" + "runId": "folder_partition_keyval.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:86297df39321e4948dbe8b8e941de98b" + "path": [ + { + "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + } + ] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_keyval.json" + "runId": "folder_partition_keyval.json", + "lastRunId": "no-run-id-provided" } }, { @@ -852,7 +887,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_keyval.json" + "runId": "folder_partition_keyval.json", + "lastRunId": "no-run-id-provided" } }, { @@ -872,37 +908,56 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_keyval.json" + "runId": "folder_partition_keyval.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_keyval.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "container", "aspect": { "json": { - "removed": false + "container": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_keyval.json" + "runId": "folder_partition_keyval.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "status", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "removed": false } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_keyval.json" + "runId": "folder_partition_keyval.json", + "lastRunId": "no-run-id-provided" } }, { @@ -919,22 +974,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_keyval.json" + "runId": "folder_partition_keyval.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "container": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_keyval.json" + "runId": "folder_partition_keyval.json", + "lastRunId": "no-run-id-provided" } }, { @@ -962,7 +1019,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_keyval.json" + "runId": "folder_partition_keyval.json", + "lastRunId": "no-run-id-provided" } }, { @@ -977,7 +1035,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_keyval.json" + "runId": "folder_partition_keyval.json", + "lastRunId": "no-run-id-provided" } }, { @@ -992,7 +1051,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_keyval.json" + "runId": "folder_partition_keyval.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1024,7 +1084,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_keyval.json" + "runId": "folder_partition_keyval.json", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_update_schema.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_update_schema.json index a5f135c30369a3..95ec5e83f7991c 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_update_schema.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_update_schema.json @@ -7,16 +7,24 @@ "aspect": { "json": { "customProperties": { - "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json/year=2022/month=jan/part3.json" + "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json/year=2022/month=jan/part3.json", + "number_of_partitions": "1" }, "name": "folder_aaa.pokemon_abilities_json", "description": "", + "created": { + "time": 1586847980000 + }, + "lastModified": { + "time": 1586847980000 + }, "tags": [] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema.json" + "runId": "folder_partition_update_schema.json", + "lastRunId": "no-run-id-provided" } }, { @@ -541,48 +549,54 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema.json" + "runId": "folder_partition_update_schema.json", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "entityType": "container", + "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", "changeType": "UPSERT", - "aspectName": "operation", + "aspectName": "containerProperties", "aspect": { "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "customProperties": { + "platform": "s3", + "env": "UAT", + "bucket_name": "my-test-bucket" }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847960000 + "name": "my-test-bucket" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema.json" + "runId": "folder_partition_update_schema.json", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "container", - "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "partitionsSummary", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "env": "UAT", - "bucket_name": "my-test-bucket" + "minPartition": { + "partition": "partition_0=year=2022/partition_1=month=jan", + "createdTime": 1586847980000, + "lastModifiedTime": 1586847980000 }, - "name": "my-test-bucket" + "maxPartition": { + "partition": "partition_0=year=2022/partition_1=month=jan", + "createdTime": 1586847980000, + "lastModifiedTime": 1586847980000 + } } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema.json" + "runId": "folder_partition_update_schema.json", + "lastRunId": "no-run-id-provided" } }, { @@ -597,22 +611,30 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema.json" + "runId": "folder_partition_update_schema.json", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "container", - "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "operation", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847990000 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema.json" + "runId": "folder_partition_update_schema.json", + "lastRunId": "no-run-id-provided" } }, { @@ -629,7 +651,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema.json" + "runId": "folder_partition_update_schema.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:s3" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema.json", + "lastRunId": "no-run-id-provided" } }, { @@ -644,12 +683,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema.json" + "runId": "folder_partition_update_schema.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { @@ -657,49 +697,52 @@ "customProperties": { "platform": "s3", "env": "UAT", - "folder_abs_path": "my-test-bucket/folder_a" + "folder_abs_path": "my-test-bucket/folder_a/folder_aa" }, - "name": "folder_a" + "name": "folder_aa" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema.json" + "runId": "folder_partition_update_schema.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "container", "aspect": { "json": { - "removed": false + "container": "urn:li:container:86297df39321e4948dbe8b8e941de98b" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema.json" + "runId": "folder_partition_update_schema.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "status", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "removed": false } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema.json" + "runId": "folder_partition_update_schema.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -711,77 +754,61 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema.json" + "runId": "folder_partition_update_schema.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "containerProperties", "aspect": { "json": { - "container": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + "customProperties": { + "platform": "s3", + "env": "UAT", + "folder_abs_path": "my-test-bucket/folder_a" + }, + "name": "folder_a" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema.json" + "runId": "folder_partition_update_schema.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", - "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema.json" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", - "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "status", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "env": "UAT", - "folder_abs_path": "my-test-bucket/folder_a/folder_aa" - }, - "name": "folder_aa" + "removed": false } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema.json" + "runId": "folder_partition_update_schema.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "removed": false + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema.json" + "runId": "folder_partition_update_schema.json", + "lastRunId": "no-run-id-provided" } }, { @@ -796,12 +823,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema.json" + "runId": "folder_partition_update_schema.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -813,22 +841,29 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema.json" + "runId": "folder_partition_update_schema.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:86297df39321e4948dbe8b8e941de98b" + "path": [ + { + "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + } + ] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema.json" + "runId": "folder_partition_update_schema.json", + "lastRunId": "no-run-id-provided" } }, { @@ -852,7 +887,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema.json" + "runId": "folder_partition_update_schema.json", + "lastRunId": "no-run-id-provided" } }, { @@ -872,37 +908,56 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema.json" + "runId": "folder_partition_update_schema.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "container", "aspect": { "json": { - "removed": false + "container": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema.json" + "runId": "folder_partition_update_schema.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "status", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "removed": false } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema.json" + "runId": "folder_partition_update_schema.json", + "lastRunId": "no-run-id-provided" } }, { @@ -919,22 +974,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema.json" + "runId": "folder_partition_update_schema.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "container": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema.json" + "runId": "folder_partition_update_schema.json", + "lastRunId": "no-run-id-provided" } }, { @@ -962,7 +1019,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema.json" + "runId": "folder_partition_update_schema.json", + "lastRunId": "no-run-id-provided" } }, { @@ -977,7 +1035,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema.json" + "runId": "folder_partition_update_schema.json", + "lastRunId": "no-run-id-provided" } }, { @@ -992,7 +1051,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema.json" + "runId": "folder_partition_update_schema.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1024,7 +1084,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema.json" + "runId": "folder_partition_update_schema.json", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_update_schema_with_partition_autodetect.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_update_schema_with_partition_autodetect.json new file mode 100644 index 00000000000000..4fee8fb36b13f0 --- /dev/null +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_update_schema_with_partition_autodetect.json @@ -0,0 +1,1483 @@ +[ +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv/part3.csv" + }, + "name": "folder_aaa.food_csv", + "description": "", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "folder_aaa.food_csv", + "platform": "urn:li:dataPlatform:s3", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "color", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "height", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "weight", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "s3", + "env": "UAT", + "bucket_name": "my-test-bucket" + }, + "name": "my-test-bucket" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847820000 + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "S3 bucket" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:s3" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "s3", + "env": "UAT", + "folder_abs_path": "my-test-bucket/folder_a" + }, + "name": "folder_a" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:s3" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "s3", + "env": "UAT", + "folder_abs_path": "my-test-bucket/folder_a/folder_aa" + }, + "name": "folder_aa" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:86297df39321e4948dbe8b8e941de98b" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:s3" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + }, + { + "id": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "urn": "urn:li:container:86297df39321e4948dbe8b8e941de98b" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "s3", + "env": "UAT", + "folder_abs_path": "my-test-bucket/folder_a/folder_aa/folder_aaa" + }, + "name": "folder_aaa" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:s3" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + }, + { + "id": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "urn": "urn:li:container:86297df39321e4948dbe8b8e941de98b" + }, + { + "id": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "urn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet/part2.parquet" + }, + "name": "folder_aaa.food_parquet", + "description": "", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "folder_aaa.food_parquet", + "platform": "urn:li:dataPlatform:s3", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "color", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "healthy", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.BooleanType": {} + } + }, + "nativeDataType": "bool", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "height", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "int64", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "weight", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "int64", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + }, + { + "id": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "urn": "urn:li:container:86297df39321e4948dbe8b8e941de98b" + }, + { + "id": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "urn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" + }, + { + "id": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", + "urn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847840000 + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json/year=2022/month=jan/part3.json", + "number_of_partitions": "1" + }, + "name": "folder_aaa.pokemon_abilities_json", + "description": "", + "created": { + "time": 1586847980000 + }, + "lastModified": { + "time": 1586847980000 + }, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "folder_aaa.pokemon_abilities_json", + "platform": "urn:li:dataPlatform:s3", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "effect_changes", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries.effect", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries.language", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries.language.is_native", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries.language.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries.language.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.version_group", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.version_group.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.version_group.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_entries", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_entries.effect", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_entries.language", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_entries.language.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_entries.language.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_entries.short_effect", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries.flavor_text", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries.language", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries.language.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries.language.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries.version_group", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries.version_group.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries.version_group.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "generation", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "generation.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "generation.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "id", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "int", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "is_main_series", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.BooleanType": {} + } + }, + "nativeDataType": "bool", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "names", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "names.language", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "names.language.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "names.language.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "names.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "pokemon", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "pokemon.is_hidden", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.BooleanType": {} + } + }, + "nativeDataType": "bool", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "pokemon.pokemon", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "pokemon.pokemon.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "pokemon.pokemon.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "pokemon.slot", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "int", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "partitionsSummary", + "aspect": { + "json": { + "minPartition": { + "partition": "year=2022/month=jan", + "createdTime": 1586847980000, + "lastModifiedTime": 1586847980000 + }, + "maxPartition": { + "partition": "year=2022/month=jan", + "createdTime": 1586847980000, + "lastModifiedTime": 1586847980000 + } + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + }, + { + "id": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "urn": "urn:li:container:86297df39321e4948dbe8b8e941de98b" + }, + { + "id": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "urn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" + }, + { + "id": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", + "urn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847990000 + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + }, + { + "id": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "urn": "urn:li:container:86297df39321e4948dbe8b8e941de98b" + }, + { + "id": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "urn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" + }, + { + "id": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", + "urn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_with_partition_autodetect_traverse_all.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_with_partition_autodetect_traverse_all.json new file mode 100644 index 00000000000000..13ba57f5be6717 --- /dev/null +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_with_partition_autodetect_traverse_all.json @@ -0,0 +1,1483 @@ +[ +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv/part3.csv" + }, + "name": "folder_aaa.food_csv", + "description": "", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "folder_aaa.food_csv", + "platform": "urn:li:dataPlatform:s3", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "color", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "height", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "weight", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "s3", + "env": "UAT", + "bucket_name": "my-test-bucket" + }, + "name": "my-test-bucket" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847820000 + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "S3 bucket" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:s3" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "s3", + "env": "UAT", + "folder_abs_path": "my-test-bucket/folder_a" + }, + "name": "folder_a" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:s3" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "s3", + "env": "UAT", + "folder_abs_path": "my-test-bucket/folder_a/folder_aa" + }, + "name": "folder_aa" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:86297df39321e4948dbe8b8e941de98b" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:s3" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + }, + { + "id": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "urn": "urn:li:container:86297df39321e4948dbe8b8e941de98b" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "s3", + "env": "UAT", + "folder_abs_path": "my-test-bucket/folder_a/folder_aa/folder_aaa" + }, + "name": "folder_aaa" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:s3" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + }, + { + "id": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "urn": "urn:li:container:86297df39321e4948dbe8b8e941de98b" + }, + { + "id": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "urn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet/part2.parquet" + }, + "name": "folder_aaa.food_parquet", + "description": "", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "folder_aaa.food_parquet", + "platform": "urn:li:dataPlatform:s3", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "color", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "healthy", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.BooleanType": {} + } + }, + "nativeDataType": "bool", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "height", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "int64", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "weight", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "int64", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + }, + { + "id": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "urn": "urn:li:container:86297df39321e4948dbe8b8e941de98b" + }, + { + "id": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "urn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" + }, + { + "id": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", + "urn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847840000 + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json/year=2022/month=jan/part3.json", + "number_of_partitions": "7" + }, + "name": "folder_aaa.pokemon_abilities_json", + "description": "", + "created": { + "time": 1586847860000 + }, + "lastModified": { + "time": 1586847980000 + }, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "folder_aaa.pokemon_abilities_json", + "platform": "urn:li:dataPlatform:s3", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "effect_changes", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries.effect", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries.language", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries.language.is_native", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries.language.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries.language.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.version_group", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.version_group.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.version_group.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_entries", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_entries.effect", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_entries.language", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_entries.language.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_entries.language.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_entries.short_effect", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries.flavor_text", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries.language", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries.language.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries.language.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries.version_group", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries.version_group.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries.version_group.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "generation", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "generation.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "generation.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "id", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "int", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "is_main_series", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.BooleanType": {} + } + }, + "nativeDataType": "bool", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "names", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "names.language", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "names.language.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "names.language.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "names.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "pokemon", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "pokemon.is_hidden", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.BooleanType": {} + } + }, + "nativeDataType": "bool", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "pokemon.pokemon", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "pokemon.pokemon.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "pokemon.pokemon.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "pokemon.slot", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "int", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "partitionsSummary", + "aspect": { + "json": { + "minPartition": { + "partition": "year=2019/month=feb", + "createdTime": 1586847860000, + "lastModifiedTime": 1586847870000 + }, + "maxPartition": { + "partition": "year=2022/month=jan", + "createdTime": 1586847980000, + "lastModifiedTime": 1586847980000 + } + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + }, + { + "id": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "urn": "urn:li:container:86297df39321e4948dbe8b8e941de98b" + }, + { + "id": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "urn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" + }, + { + "id": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", + "urn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847990000 + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + }, + { + "id": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "urn": "urn:li:container:86297df39321e4948dbe8b8e941de98b" + }, + { + "id": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "urn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" + }, + { + "id": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", + "urn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_with_partition_autodetect_traverse_min_max.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_with_partition_autodetect_traverse_min_max.json new file mode 100644 index 00000000000000..9f45583f1d3eca --- /dev/null +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_with_partition_autodetect_traverse_min_max.json @@ -0,0 +1,1483 @@ +[ +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv/part3.csv" + }, + "name": "folder_aaa.food_csv", + "description": "", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "folder_aaa.food_csv", + "platform": "urn:li:dataPlatform:s3", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "color", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "height", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "weight", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "s3", + "env": "UAT", + "bucket_name": "my-test-bucket" + }, + "name": "my-test-bucket" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847820000 + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "S3 bucket" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:s3" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "s3", + "env": "UAT", + "folder_abs_path": "my-test-bucket/folder_a" + }, + "name": "folder_a" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:s3" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "s3", + "env": "UAT", + "folder_abs_path": "my-test-bucket/folder_a/folder_aa" + }, + "name": "folder_aa" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:86297df39321e4948dbe8b8e941de98b" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:s3" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + }, + { + "id": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "urn": "urn:li:container:86297df39321e4948dbe8b8e941de98b" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "s3", + "env": "UAT", + "folder_abs_path": "my-test-bucket/folder_a/folder_aa/folder_aaa" + }, + "name": "folder_aaa" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:s3" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + }, + { + "id": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "urn": "urn:li:container:86297df39321e4948dbe8b8e941de98b" + }, + { + "id": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "urn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet/part2.parquet" + }, + "name": "folder_aaa.food_parquet", + "description": "", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "folder_aaa.food_parquet", + "platform": "urn:li:dataPlatform:s3", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "color", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "healthy", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.BooleanType": {} + } + }, + "nativeDataType": "bool", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "height", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "int64", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "weight", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "int64", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + }, + { + "id": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "urn": "urn:li:container:86297df39321e4948dbe8b8e941de98b" + }, + { + "id": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "urn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" + }, + { + "id": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", + "urn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847840000 + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json/year=2022/month=jan/part3.json", + "number_of_partitions": "2" + }, + "name": "folder_aaa.pokemon_abilities_json", + "description": "", + "created": { + "time": 1586847860000 + }, + "lastModified": { + "time": 1586847980000 + }, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "folder_aaa.pokemon_abilities_json", + "platform": "urn:li:dataPlatform:s3", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "effect_changes", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries.effect", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries.language", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries.language.is_native", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries.language.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries.language.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.version_group", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.version_group.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.version_group.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_entries", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_entries.effect", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_entries.language", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_entries.language.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_entries.language.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_entries.short_effect", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries.flavor_text", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries.language", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries.language.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries.language.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries.version_group", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries.version_group.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "flavor_text_entries.version_group.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "generation", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "generation.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "generation.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "id", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "int", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "is_main_series", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.BooleanType": {} + } + }, + "nativeDataType": "bool", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "names", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "names.language", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "names.language.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "names.language.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "names.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "pokemon", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "pokemon.is_hidden", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.BooleanType": {} + } + }, + "nativeDataType": "bool", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "pokemon.pokemon", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "dict", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "pokemon.pokemon.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "pokemon.pokemon.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "pokemon.slot", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "int", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "partitionsSummary", + "aspect": { + "json": { + "minPartition": { + "partition": "year=2019/month=feb", + "createdTime": 1586847860000, + "lastModifiedTime": 1586847870000 + }, + "maxPartition": { + "partition": "year=2022/month=jan", + "createdTime": 1586847980000, + "lastModifiedTime": 1586847980000 + } + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + }, + { + "id": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "urn": "urn:li:container:86297df39321e4948dbe8b8e941de98b" + }, + { + "id": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "urn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" + }, + { + "id": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", + "urn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847990000 + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + }, + { + "id": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "urn": "urn:li:container:86297df39321e4948dbe8b8e941de98b" + }, + { + "id": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "urn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" + }, + { + "id": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", + "urn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json index d7a9bca716fd60..d97cedd39d77c3 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json @@ -65,19 +65,19 @@ }, "fields": [ { - "fieldPath": "Sampling Date", + "fieldPath": "2", "nullable": false, "type": { "type": { - "com.linkedin.schema.DateType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "date", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "Site ID", + "fieldPath": "3", "nullable": false, "type": { "type": { @@ -89,7 +89,7 @@ "isPartOfKey": false }, { - "fieldPath": "Park ID", + "fieldPath": "Br \n(mg/L)", "nullable": false, "type": { "type": { @@ -101,7 +101,7 @@ "isPartOfKey": false }, { - "fieldPath": "Lat (\u00b0N)", + "fieldPath": "Ca \n(mg/L)", "nullable": false, "type": { "type": { @@ -113,7 +113,7 @@ "isPartOfKey": false }, { - "fieldPath": "Long (\u00b0W)", + "fieldPath": "Cl \n(mg/L)", "nullable": false, "type": { "type": { @@ -125,7 +125,7 @@ "isPartOfKey": false }, { - "fieldPath": "Water Temp (\u00b0C)", + "fieldPath": "Cond (\u00b5S/cm)", "nullable": false, "type": { "type": { @@ -137,31 +137,31 @@ "isPartOfKey": false }, { - "fieldPath": "Cond (\u00b5S/cm)", + "fieldPath": "DO (mg/L)", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "number", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "pH", + "fieldPath": "DOC [mg/L C]", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "number", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "DO (mg/L)", + "fieldPath": "F \n(mg/L)", "nullable": false, "type": { "type": { @@ -173,19 +173,19 @@ "isPartOfKey": false }, { - "fieldPath": "Secchi Depth (m)", + "fieldPath": "K \n(mg/L)", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "number", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "UV Absorbance, 254nm", + "fieldPath": "Lat (\u00b0N)", "nullable": false, "type": { "type": { @@ -197,7 +197,7 @@ "isPartOfKey": false }, { - "fieldPath": "DOC [mg/L C]", + "fieldPath": "Long (\u00b0W)", "nullable": false, "type": { "type": { @@ -209,7 +209,7 @@ "isPartOfKey": false }, { - "fieldPath": "SUVA, 254nm", + "fieldPath": "Mg \n(mg/L)", "nullable": false, "type": { "type": { @@ -245,31 +245,31 @@ "isPartOfKey": false }, { - "fieldPath": "PO4-P \n(mg P/L)", + "fieldPath": "Na \n(mg/L)", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "number", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "TDN \n(mg N/L)", + "fieldPath": "PO4-P \n(mg P/L)", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "number", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "TDP \n(mg P/L)", + "fieldPath": "Park ID", "nullable": false, "type": { "type": { @@ -281,7 +281,7 @@ "isPartOfKey": false }, { - "fieldPath": "Cl \n(mg/L)", + "fieldPath": "SO4-S \n(mg/L)", "nullable": false, "type": { "type": { @@ -293,7 +293,7 @@ "isPartOfKey": false }, { - "fieldPath": "SO4-S \n(mg/L)", + "fieldPath": "SUVA, 254nm", "nullable": false, "type": { "type": { @@ -305,19 +305,19 @@ "isPartOfKey": false }, { - "fieldPath": "F \n(mg/L)", + "fieldPath": "Sampling Date", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.DateType": {} } }, - "nativeDataType": "string", + "nativeDataType": "date", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "Br \n(mg/L)", + "fieldPath": "Secchi Depth (m)", "nullable": false, "type": { "type": { @@ -329,19 +329,19 @@ "isPartOfKey": false }, { - "fieldPath": "Na \n(mg/L)", + "fieldPath": "Site ID", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "number", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "K \n(mg/L)", + "fieldPath": "TDN \n(mg N/L)", "nullable": false, "type": { "type": { @@ -353,19 +353,19 @@ "isPartOfKey": false }, { - "fieldPath": "Ca \n(mg/L)", + "fieldPath": "TDP \n(mg P/L)", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "number", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "Mg \n(mg/L)", + "fieldPath": "UV Absorbance, 254nm", "nullable": false, "type": { "type": { @@ -377,19 +377,19 @@ "isPartOfKey": false }, { - "fieldPath": "d18O", + "fieldPath": "Water Temp (\u00b0C)", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "number", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "dD", + "fieldPath": "d18O", "nullable": false, "type": { "type": { @@ -401,7 +401,7 @@ "isPartOfKey": false }, { - "fieldPath": "field29", + "fieldPath": "dD", "nullable": false, "type": { "type": { @@ -413,7 +413,7 @@ "isPartOfKey": false }, { - "fieldPath": "2", + "fieldPath": "field29", "nullable": false, "type": { "type": { @@ -425,7 +425,7 @@ "isPartOfKey": false }, { - "fieldPath": "3", + "fieldPath": "pH", "nullable": false, "type": { "type": { @@ -447,18 +447,18 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/NPS.7.1.package_data_NPS.6.1_ARCN_Lakes_ChemistryData_v1_csv.csv,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/small.csv,DEV)", "changeType": "UPSERT", "aspectName": "operation", "aspect": { "json": { "timestampMillis": 1615443388097, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847610000 + "lastUpdatedTimestamp": 1586847660000 } }, "systemMetadata": { @@ -468,19 +468,13 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/small.csv,DEV)", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "container", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "instance": "test-platform-instance", - "env": "DEV", - "bucket_name": "my-test-bucket" - }, - "name": "my-test-bucket" + "container": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9" } }, "systemMetadata": { @@ -490,8 +484,8 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/small.csv,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -506,14 +500,19 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/NPS.7.1.package_data_NPS.6.1_ARCN_Lakes_ChemistryData_v1_csv.csv,DEV)", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "operation", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3", - "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847610000 } }, "systemMetadata": { @@ -523,15 +522,14 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/small.csv,DEV)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "typeNames": [ - "S3 bucket" - ] + "platform": "urn:li:dataPlatform:s3", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" } }, "systemMetadata": { @@ -544,27 +542,6 @@ "entityType": "container", "entityUrn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", - "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", - "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { "json": { @@ -572,9 +549,9 @@ "platform": "s3", "instance": "test-platform-instance", "env": "DEV", - "folder_abs_path": "my-test-bucket/folder_a" + "bucket_name": "my-test-bucket" }, - "name": "folder_a" + "name": "my-test-bucket" } }, "systemMetadata": { @@ -585,7 +562,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", + "entityUrn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -601,7 +578,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", + "entityUrn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -617,31 +594,20 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/small.csv,DEV)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "datasetProperties", "aspect": { "json": { - "container": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" + "customProperties": { + "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/small.csv", + "number_of_files": "1", + "size_in_bytes": "172" + }, + "name": "small.csv", + "description": "", + "tags": [] } }, "systemMetadata": { @@ -652,7 +618,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", + "entityUrn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -661,10 +627,6 @@ { "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" - }, - { - "id": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", - "urn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" } ] } @@ -677,18 +639,14 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", + "entityUrn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "subTypes", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "instance": "test-platform-instance", - "env": "DEV", - "folder_abs_path": "my-test-bucket/folder_a/folder_aa" - }, - "name": "folder_aa" + "typeNames": [ + "S3 bucket" + ] } }, "systemMetadata": { @@ -698,13 +656,34 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/small.csv,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "browsePathsV2", "aspect": { "json": { - "removed": false + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + }, + { + "id": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", + "urn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" + }, + { + "id": "urn:li:container:c8d940d2010edd365619411b385b11e4", + "urn": "urn:li:container:c8d940d2010edd365619411b385b11e4" + }, + { + "id": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", + "urn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc" + }, + { + "id": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", + "urn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9" + } + ] } }, "systemMetadata": { @@ -714,14 +693,91 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/small.csv,DEV)", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "schemaMetadata", "aspect": { "json": { + "schemaName": "small.csv", "platform": "urn:li:dataPlatform:s3", - "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "1st chord", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "2nd chord", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "3rd chord", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "4th chord", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "Progression Quality", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + } + ] } }, "systemMetadata": { @@ -732,14 +788,18 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", + "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "containerProperties", "aspect": { "json": { - "typeNames": [ - "Folder" - ] + "customProperties": { + "platform": "s3", + "instance": "test-platform-instance", + "env": "DEV", + "folder_abs_path": "my-test-bucket/folder_a" + }, + "name": "folder_a" } }, "systemMetadata": { @@ -750,12 +810,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", + "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:c8d940d2010edd365619411b385b11e4" + "container": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" } }, "systemMetadata": { @@ -766,25 +826,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", + "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "status", "aspect": { "json": { - "path": [ - { - "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", - "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" - }, - { - "id": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", - "urn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" - }, - { - "id": "urn:li:container:c8d940d2010edd365619411b385b11e4", - "urn": "urn:li:container:c8d940d2010edd365619411b385b11e4" - } - ] + "removed": false } }, "systemMetadata": { @@ -795,7 +842,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", + "entityUrn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { @@ -804,9 +851,9 @@ "platform": "s3", "instance": "test-platform-instance", "env": "DEV", - "folder_abs_path": "my-test-bucket/folder_a/folder_aa/folder_aaa" + "folder_abs_path": "my-test-bucket/folder_a/folder_aa" }, - "name": "folder_aaa" + "name": "folder_aa" } }, "systemMetadata": { @@ -817,7 +864,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", + "entityUrn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -833,7 +880,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", + "entityUrn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -850,7 +897,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", + "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -868,12 +915,14 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", + "entityUrn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "subTypes", "aspect": { "json": { - "container": "urn:li:container:b0037296cdd497e3137aa0628b8687bc" + "typeNames": [ + "Folder" + ] } }, "systemMetadata": { @@ -884,7 +933,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", + "entityUrn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -901,10 +950,48 @@ { "id": "urn:li:container:c8d940d2010edd365619411b385b11e4", "urn": "urn:li:container:c8d940d2010edd365619411b385b11e4" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:s3", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" }, { - "id": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", - "urn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc" + "id": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", + "urn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" } ] } @@ -916,13 +1003,35 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/NPS.7.1.package_data_NPS.6.1_ARCN_Lakes_ChemistryData_v1_csv.csv,DEV)", + "entityType": "container", + "entityUrn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9" + "container": "urn:li:container:c8d940d2010edd365619411b385b11e4" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,DEV)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847620000 } }, "systemMetadata": { @@ -972,17 +1081,42 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,DEV)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "container", "aspect": { "json": { - "customProperties": { - "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro", - "number_of_files": "1", - "size_in_bytes": "619" - }, - "name": "chord_progressions_avro.avro", - "description": "", - "tags": [] + "container": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/NPS.7.1.package_data_NPS.6.1_ARCN_Lakes_ChemistryData_v1_csv.csv,DEV)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false } }, "systemMetadata": { @@ -1012,10 +1146,73 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,DEV)", "changeType": "UPSERT", - "aspectName": "schemaMetadata", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro", + "number_of_files": "1", + "size_in_bytes": "619" + }, + "name": "chord_progressions_avro.avro", + "description": "", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "schemaName": "chord_progressions_avro.avro", + "platform": "urn:li:dataPlatform:s3", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv", + "number_of_files": "1", + "size_in_bytes": "604" + }, + "name": "chord_progressions_csv.csv", + "description": "", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "chord_progressions_csv.csv", "platform": "urn:li:dataPlatform:s3", "version": 0, "created": { @@ -1034,62 +1231,62 @@ }, "fields": [ { - "fieldPath": "[version=2.0].[type=Record].[type=long].FirstChord", + "fieldPath": "1st chord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "FirstChord", + "nativeDataType": "integer", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].FourthChord", + "fieldPath": "2nd chord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "FourthChord", + "nativeDataType": "integer", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].SecondChord", + "fieldPath": "3rd chord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "SecondChord", + "nativeDataType": "integer", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].ThirdChord", + "fieldPath": "4th chord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "ThirdChord", + "nativeDataType": "integer", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=string].ProgressionQuality", + "fieldPath": "Progression Quality", "nullable": false, "type": { "type": { "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "ProgressionQuality", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false } @@ -1102,44 +1299,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,DEV)", - "changeType": "UPSERT", - "aspectName": "operation", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847620000 - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,DEV)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,DEV)", @@ -1179,52 +1338,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:s3", - "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", - "changeType": "UPSERT", - "aspectName": "datasetProperties", - "aspect": { - "json": { - "customProperties": { - "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv", - "number_of_files": "1", - "size_in_bytes": "604" - }, - "name": "chord_progressions_csv.csv", - "description": "", - "tags": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,DEV)", "changeType": "UPSERT", "aspectName": "schemaMetadata", "aspect": { "json": { - "schemaName": "chord_progressions_csv.csv", + "schemaName": "chord_progressions_avro.avro", "platform": "urn:li:dataPlatform:s3", "version": 0, "created": { @@ -1243,62 +1362,62 @@ }, "fields": [ { - "fieldPath": "1st chord", + "fieldPath": "[version=2.0].[type=Record].[type=long].FirstChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "integer", + "nativeDataType": "FirstChord", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "2nd chord", + "fieldPath": "[version=2.0].[type=Record].[type=long].FourthChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "integer", + "nativeDataType": "FourthChord", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "3rd chord", + "fieldPath": "[version=2.0].[type=Record].[type=long].SecondChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "integer", + "nativeDataType": "SecondChord", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "4th chord", + "fieldPath": "[version=2.0].[type=Record].[type=long].ThirdChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "integer", + "nativeDataType": "ThirdChord", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "Progression Quality", + "fieldPath": "[version=2.0].[type=Record].[type=string].ProgressionQuality", "nullable": false, "type": { "type": { "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "string", + "nativeDataType": "ProgressionQuality", "recursive": false, "isPartOfKey": false } @@ -1312,35 +1431,19 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", + "entityType": "container", + "entityUrn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", "changeType": "UPSERT", - "aspectName": "operation", + "aspectName": "containerProperties", "aspect": { "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "customProperties": { + "platform": "s3", + "instance": "test-platform-instance", + "env": "DEV", + "folder_abs_path": "my-test-bucket/folder_a/folder_aa/folder_aaa" }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847630000 - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9" + "name": "folder_aaa" } }, "systemMetadata": { @@ -1351,33 +1454,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/countries_json.json,DEV)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "path": [ - { - "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", - "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" - }, - { - "id": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", - "urn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" - }, - { - "id": "urn:li:container:c8d940d2010edd365619411b385b11e4", - "urn": "urn:li:container:c8d940d2010edd365619411b385b11e4" - }, - { - "id": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", - "urn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc" - }, - { - "id": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", - "urn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9" - } - ] + "platform": "urn:li:dataPlatform:s3", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" } }, "systemMetadata": { @@ -1409,23 +1492,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/countries_json.json,DEV)", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:s3", - "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/countries_json.json,DEV)", @@ -1498,18 +1564,55 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/countries_json.json,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + }, + { + "id": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", + "urn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" + }, + { + "id": "urn:li:container:c8d940d2010edd365619411b385b11e4", + "urn": "urn:li:container:c8d940d2010edd365619411b385b11e4" + }, + { + "id": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", + "urn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc" + }, + { + "id": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", + "urn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", "changeType": "UPSERT", "aspectName": "operation", "aspect": { "json": { "timestampMillis": 1615443388097, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847640000 + "lastUpdatedTimestamp": 1586847630000 } }, "systemMetadata": { @@ -1520,7 +1623,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/countries_json.json,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -1573,19 +1676,18 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/countries_json.json,DEV)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "operation", "aspect": { "json": { - "customProperties": { - "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet.parquet", - "number_of_files": "1", - "size_in_bytes": "4206" + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, - "name": "food_parquet.parquet", - "description": "", - "tags": [] + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847640000 } }, "systemMetadata": { @@ -1596,7 +1698,55 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/countries_json.json,DEV)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:b0037296cdd497e3137aa0628b8687bc" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -1612,89 +1762,46 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", + "entityType": "container", + "entityUrn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", "changeType": "UPSERT", - "aspectName": "schemaMetadata", + "aspectName": "subTypes", "aspect": { "json": { - "schemaName": "food_parquet.parquet", - "platform": "urn:li:dataPlatform:s3", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "color", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, + "typeNames": [ + "Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ { - "fieldPath": "healthy", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.BooleanType": {} - } - }, - "nativeDataType": "bool", - "recursive": false, - "isPartOfKey": false + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" }, { - "fieldPath": "height", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "int64", - "recursive": false, - "isPartOfKey": false + "id": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", + "urn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" }, - { - "fieldPath": "name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false + { + "id": "urn:li:container:c8d940d2010edd365619411b385b11e4", + "urn": "urn:li:container:c8d940d2010edd365619411b385b11e4" }, { - "fieldPath": "weight", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "int64", - "recursive": false, - "isPartOfKey": false + "id": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", + "urn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc" } ] } @@ -1707,18 +1814,18 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", "changeType": "UPSERT", "aspectName": "operation", "aspect": { "json": { "timestampMillis": 1615443388097, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847650000 + "lastUpdatedTimestamp": 1586847670000 } }, "systemMetadata": { @@ -1729,7 +1836,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -1745,33 +1852,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "status", "aspect": { "json": { - "path": [ - { - "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", - "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" - }, - { - "id": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", - "urn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" - }, - { - "id": "urn:li:container:c8d940d2010edd365619411b385b11e4", - "urn": "urn:li:container:c8d940d2010edd365619411b385b11e4" - }, - { - "id": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", - "urn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc" - }, - { - "id": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", - "urn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9" - } - ] + "removed": false } }, "systemMetadata": { @@ -1782,7 +1868,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/small.csv,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -1799,17 +1885,34 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/small.csv,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:s3", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { "json": { "customProperties": { - "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/small.csv", + "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet.parquet", "number_of_files": "1", - "size_in_bytes": "172" + "size_in_bytes": "4206" }, - "name": "small.csv", + "name": "food_parquet.parquet", "description": "", "tags": [] } @@ -1822,12 +1925,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/small.csv,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", "changeType": "UPSERT", "aspectName": "schemaMetadata", "aspect": { "json": { - "schemaName": "small.csv", + "schemaName": "food_parquet.parquet", "platform": "urn:li:dataPlatform:s3", "version": 0, "created": { @@ -1846,62 +1949,62 @@ }, "fields": [ { - "fieldPath": "1st chord", + "fieldPath": "color", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "integer", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "2nd chord", + "fieldPath": "healthy", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.BooleanType": {} } }, - "nativeDataType": "integer", + "nativeDataType": "bool", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "3rd chord", + "fieldPath": "height", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "integer", + "nativeDataType": "int64", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "4th chord", + "fieldPath": "name", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "integer", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "Progression Quality", + "fieldPath": "weight", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "int64", "recursive": false, "isPartOfKey": false } @@ -1916,34 +2019,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/small.csv,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", "changeType": "UPSERT", - "aspectName": "operation", + "aspectName": "datasetProperties", "aspect": { "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "customProperties": { + "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv", + "number_of_files": "1", + "size_in_bytes": "34056" }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847660000 - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/small.csv,DEV)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9" + "name": "wa_fn_usec_hr_employee_attrition_csv.csv", + "description": "", + "tags": [] } }, "systemMetadata": { @@ -1954,7 +2042,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/small.csv,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -1989,29 +2077,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", - "changeType": "UPSERT", - "aspectName": "datasetProperties", - "aspect": { - "json": { - "customProperties": { - "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv", - "number_of_files": "1", - "size_in_bytes": "34056" - }, - "name": "wa_fn_usec_hr_employee_attrition_csv.csv", - "description": "", - "tags": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", @@ -2420,62 +2485,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", - "changeType": "UPSERT", - "aspectName": "operation", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847670000 - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:s3", - "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -2512,28 +2522,18 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/NPS.7.1.package_data_NPS.6.1_ARCN_Lakes_ChemistryData_v1_csv.csv,DEV)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "operation", "aspect": { "json": { - "removed": false + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847650000 } }, "systemMetadata": { @@ -2544,12 +2544,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "container", "aspect": { "json": { - "removed": false + "container": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9" } }, "systemMetadata": { @@ -2560,7 +2560,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/countries_json.json,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/NPS.7.1.package_data_NPS.6.1_ARCN_Lakes_ChemistryData_v1_csv.csv,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2592,7 +2592,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/small.csv,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2608,7 +2608,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/countries_json.json,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_spec_for_files.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_spec_for_files.json index 9c41bbdc80c496..5d45a2a5f696e1 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_spec_for_files.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_spec_for_files.json @@ -18,7 +18,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_spec_for_files.json" + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -111,7 +112,45 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_spec_for_files.json" + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "s3", + "env": "PROD", + "folder_abs_path": "my-test-bucket/folder_a/folder_aa" + }, + "name": "folder_aa" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -133,7 +172,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_spec_for_files.json" + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -145,8 +185,8 @@ "json": { "timestampMillis": 1615443388097, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "operationType": "UPDATE", "lastUpdatedTimestamp": 1586847620000 @@ -154,32 +194,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_spec_for_files.json" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "s3", - "env": "PROD", - "bucket_name": "my-test-bucket" - }, - "name": "my-test-bucket" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "multiple_spec_for_files.json" + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", + "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -189,54 +210,47 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_spec_for_files.json" + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:s3" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "multiple_spec_for_files.json" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", + "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "S3 bucket" + "Folder" ] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_spec_for_files.json" + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "containerProperties", "aspect": { "json": { - "path": [] + "customProperties": { + "platform": "s3", + "env": "PROD", + "bucket_name": "my-test-bucket" + }, + "name": "my-test-bucket" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_spec_for_files.json" + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -256,7 +270,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_spec_for_files.json" + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -271,7 +286,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_spec_for_files.json" + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -286,39 +302,42 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_spec_for_files.json" + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c", + "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "status", "aspect": { "json": { - "typeNames": [ - "Folder" - ] + "removed": false } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_spec_for_files.json" + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "subTypes", "aspect": { "json": { - "container": "urn:li:container:2151647ff17bde0f948909d19fa91b9b" + "typeNames": [ + "Folder" + ] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_spec_for_files.json" + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -338,89 +357,90 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_spec_for_files.json" + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", + "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "env": "PROD", - "folder_abs_path": "my-test-bucket/folder_a/folder_aa" - }, - "name": "folder_aa" + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_spec_for_files.json" + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "removed": false + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_spec_for_files.json" + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", + "entityUrn": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "container", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "container": "urn:li:container:2151647ff17bde0f948909d19fa91b9b" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_spec_for_files.json" + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", + "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "Folder" - ] + "path": [] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_spec_for_files.json" + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", + "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "subTypes", "aspect": { "json": { - "container": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c" + "typeNames": [ + "S3 bucket" + ] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_spec_for_files.json" + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -444,7 +464,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_spec_for_files.json" + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -464,37 +485,40 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_spec_for_files.json" + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:5abb7acbb8783b9e2d266c15bf7cebc0", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "container", "aspect": { "json": { - "removed": false + "container": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_spec_for_files.json" + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:5abb7acbb8783b9e2d266c15bf7cebc0", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "status", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "removed": false } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_spec_for_files.json" + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -511,22 +535,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_spec_for_files.json" + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:5abb7acbb8783b9e2d266c15bf7cebc0", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "container": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15" + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_spec_for_files.json" + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -554,7 +580,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_spec_for_files.json" + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -569,39 +596,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_spec_for_files.json" + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "status", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", - "urn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b" - }, - { - "id": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c", - "urn": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c" - }, - { - "id": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", - "urn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15" - }, - { - "id": "urn:li:container:5abb7acbb8783b9e2d266c15bf7cebc0", - "urn": "urn:li:container:5abb7acbb8783b9e2d266c15bf7cebc0" - } - ] + "removed": false } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_spec_for_files.json" + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -623,7 +635,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_spec_for_files.json" + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -716,65 +729,102 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_spec_for_files.json" + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", "changeType": "UPSERT", - "aspectName": "globalTags", + "aspectName": "browsePathsV2", "aspect": { "json": { - "tags": [ + "path": [ { - "tag": "urn:li:tag:baz:bob" + "id": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", + "urn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b" }, { - "tag": "urn:li:tag:foo:bar" + "id": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c", + "urn": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c" + }, + { + "id": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", + "urn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15" + }, + { + "id": "urn:li:container:5abb7acbb8783b9e2d266c15bf7cebc0", + "urn": "urn:li:container:5abb7acbb8783b9e2d266c15bf7cebc0" } ] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_spec_for_files.json" + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:baz:bob", + "changeType": "UPSERT", + "aspectName": "tagKey", + "aspect": { + "json": { + "name": "baz:bob" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,PROD)", "changeType": "UPSERT", - "aspectName": "operation", + "aspectName": "globalTags", "aspect": { "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847630000 + "tags": [ + { + "tag": "urn:li:tag:baz:bob" + }, + { + "tag": "urn:li:tag:foo:bar" + } + ] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_spec_for_files.json" + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "operation", "aspect": { "json": { - "container": "urn:li:container:5abb7acbb8783b9e2d266c15bf7cebc0" + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847630000 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_spec_for_files.json" + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -806,22 +856,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_spec_for_files.json" + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,PROD)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "container", "aspect": { "json": { - "removed": false + "container": "urn:li:container:5abb7acbb8783b9e2d266c15bf7cebc0" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_spec_for_files.json" + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -836,22 +888,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_spec_for_files.json" - } -}, -{ - "entityType": "tag", - "entityUrn": "urn:li:tag:baz:bob", - "changeType": "UPSERT", - "aspectName": "tagKey", - "aspect": { - "json": { - "name": "baz:bob" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "multiple_spec_for_files.json" + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -866,7 +904,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_spec_for_files.json" + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_specs_of_different_buckets.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_specs_of_different_buckets.json index 985140f774ab47..90c3e0b3850b79 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_specs_of_different_buckets.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_specs_of_different_buckets.json @@ -18,7 +18,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { @@ -111,7 +112,45 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "s3", + "env": "PROD", + "folder_abs_path": "my-test-bucket/folder_a/folder_aa" + }, + "name": "folder_aa" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { @@ -133,7 +172,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { @@ -145,8 +185,8 @@ "json": { "timestampMillis": 1615443388097, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "operationType": "UPDATE", "lastUpdatedTimestamp": 1586847620000 @@ -154,32 +194,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "s3", - "env": "PROD", - "bucket_name": "my-test-bucket" - }, - "name": "my-test-bucket" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", + "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -189,54 +210,47 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:s3" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", + "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "S3 bucket" + "Folder" ] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "containerProperties", "aspect": { "json": { - "path": [] + "customProperties": { + "platform": "s3", + "env": "PROD", + "bucket_name": "my-test-bucket" + }, + "name": "my-test-bucket" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { @@ -256,7 +270,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { @@ -271,7 +286,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { @@ -286,39 +302,42 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c", + "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "status", "aspect": { "json": { - "typeNames": [ - "Folder" - ] + "removed": false } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "subTypes", "aspect": { "json": { - "container": "urn:li:container:2151647ff17bde0f948909d19fa91b9b" + "typeNames": [ + "Folder" + ] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { @@ -338,89 +357,90 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", + "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "env": "PROD", - "folder_abs_path": "my-test-bucket/folder_a/folder_aa" - }, - "name": "folder_aa" + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "removed": false + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", + "entityUrn": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "container", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "container": "urn:li:container:2151647ff17bde0f948909d19fa91b9b" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", + "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "Folder" - ] + "path": [] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", + "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "subTypes", "aspect": { "json": { - "container": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c" + "typeNames": [ + "S3 bucket" + ] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { @@ -444,7 +464,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { @@ -464,37 +485,40 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:5abb7acbb8783b9e2d266c15bf7cebc0", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "container", "aspect": { "json": { - "removed": false + "container": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:5abb7acbb8783b9e2d266c15bf7cebc0", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "status", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "removed": false } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { @@ -511,22 +535,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:5abb7acbb8783b9e2d266c15bf7cebc0", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "container": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15" + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { @@ -554,7 +580,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { @@ -569,39 +596,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "status", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", - "urn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b" - }, - { - "id": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c", - "urn": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c" - }, - { - "id": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", - "urn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15" - }, - { - "id": "urn:li:container:5abb7acbb8783b9e2d266c15bf7cebc0", - "urn": "urn:li:container:5abb7acbb8783b9e2d266c15bf7cebc0" - } - ] + "removed": false } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { @@ -623,7 +635,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { @@ -716,100 +729,123 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket-2/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", "changeType": "UPSERT", - "aspectName": "globalTags", + "aspectName": "browsePathsV2", "aspect": { "json": { - "tags": [ + "path": [ { - "tag": "urn:li:tag:baz:bob" + "id": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", + "urn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b" }, { - "tag": "urn:li:tag:foo:bar" + "id": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c", + "urn": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c" + }, + { + "id": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", + "urn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15" + }, + { + "id": "urn:li:container:5abb7acbb8783b9e2d266c15bf7cebc0", + "urn": "urn:li:container:5abb7acbb8783b9e2d266c15bf7cebc0" } ] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket-2/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:680e54d5e3a7705caa1d99893fab4924", "changeType": "UPSERT", - "aspectName": "operation", + "aspectName": "containerProperties", "aspect": { "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "customProperties": { + "platform": "s3", + "env": "PROD", + "bucket_name": "my-test-bucket-2" }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847630000 + "name": "my-test-bucket-2" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "container", - "entityUrn": "urn:li:container:680e54d5e3a7705caa1d99893fab4924", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket-2/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,PROD)", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "globalTags", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "env": "PROD", - "bucket_name": "my-test-bucket-2" - }, - "name": "my-test-bucket-2" + "tags": [ + { + "tag": "urn:li:tag:baz:bob" + }, + { + "tag": "urn:li:tag:foo:bar" + } + ] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "container", - "entityUrn": "urn:li:container:680e54d5e3a7705caa1d99893fab4924", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket-2/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,PROD)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "operation", "aspect": { "json": { - "removed": false + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847630000 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:680e54d5e3a7705caa1d99893fab4924", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "status", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "removed": false } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { @@ -826,22 +862,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:680e54d5e3a7705caa1d99893fab4924", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "path": [] + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { @@ -861,7 +899,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { @@ -876,7 +915,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { @@ -891,39 +931,42 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:f6d8484efac8152d10620c6c0699d02d", + "entityUrn": "urn:li:container:680e54d5e3a7705caa1d99893fab4924", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "Folder" - ] + "path": [] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:f6d8484efac8152d10620c6c0699d02d", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "subTypes", "aspect": { "json": { - "container": "urn:li:container:680e54d5e3a7705caa1d99893fab4924" + "typeNames": [ + "Folder" + ] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { @@ -943,7 +986,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { @@ -963,7 +1007,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { @@ -978,7 +1023,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { @@ -993,39 +1039,42 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "container", - "entityUrn": "urn:li:container:6e8c28494477b4a90cf5fd395217bae0", + "entityType": "tag", + "entityUrn": "urn:li:tag:baz:bob", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "tagKey", "aspect": { "json": { - "typeNames": [ - "Folder" - ] + "name": "baz:bob" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:6e8c28494477b4a90cf5fd395217bae0", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "subTypes", "aspect": { "json": { - "container": "urn:li:container:f6d8484efac8152d10620c6c0699d02d" + "typeNames": [ + "Folder" + ] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1049,7 +1098,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1069,37 +1119,72 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f6d8484efac8152d10620c6c0699d02d", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:680e54d5e3a7705caa1d99893fab4924" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:6e8c28494477b4a90cf5fd395217bae0", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f6d8484efac8152d10620c6c0699d02d" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:ce2eca2107ef4c0b47a8f4a65eff971c", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "container", "aspect": { "json": { - "removed": false + "container": "urn:li:container:6e8c28494477b4a90cf5fd395217bae0" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:ce2eca2107ef4c0b47a8f4a65eff971c", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "status", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "removed": false } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1116,22 +1201,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:ce2eca2107ef4c0b47a8f4a65eff971c", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "container": "urn:li:container:6e8c28494477b4a90cf5fd395217bae0" + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1159,7 +1246,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1174,7 +1262,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1189,7 +1278,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1221,37 +1311,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" - } -}, -{ - "entityType": "tag", - "entityUrn": "urn:li:tag:baz:bob", - "changeType": "UPSERT", - "aspectName": "tagKey", - "aspect": { - "json": { - "name": "baz:bob" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1266,7 +1327,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json" + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_single_file.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_single_file.json index 5d87d423a6a674..5fb691c1ee4579 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_single_file.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_single_file.json @@ -18,7 +18,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_file.json" + "runId": "single_file.json", + "lastRunId": "no-run-id-provided" } }, { @@ -111,7 +112,45 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_file.json" + "runId": "single_file.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "s3", + "env": "PROD", + "folder_abs_path": "my-test-bucket/folder_a/folder_aa" + }, + "name": "folder_aa" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "single_file.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "single_file.json", + "lastRunId": "no-run-id-provided" } }, { @@ -133,7 +172,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_file.json" + "runId": "single_file.json", + "lastRunId": "no-run-id-provided" } }, { @@ -145,8 +185,8 @@ "json": { "timestampMillis": 1615443388097, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "operationType": "UPDATE", "lastUpdatedTimestamp": 1586847620000 @@ -154,32 +194,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_file.json" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "s3", - "env": "PROD", - "bucket_name": "my-test-bucket" - }, - "name": "my-test-bucket" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "single_file.json" + "runId": "single_file.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", + "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -189,54 +210,47 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_file.json" + "runId": "single_file.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:s3" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "single_file.json" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", + "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "S3 bucket" + "Folder" ] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_file.json" + "runId": "single_file.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "containerProperties", "aspect": { "json": { - "path": [] + "customProperties": { + "platform": "s3", + "env": "PROD", + "bucket_name": "my-test-bucket" + }, + "name": "my-test-bucket" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_file.json" + "runId": "single_file.json", + "lastRunId": "no-run-id-provided" } }, { @@ -256,7 +270,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_file.json" + "runId": "single_file.json", + "lastRunId": "no-run-id-provided" } }, { @@ -271,7 +286,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_file.json" + "runId": "single_file.json", + "lastRunId": "no-run-id-provided" } }, { @@ -286,39 +302,42 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_file.json" + "runId": "single_file.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c", + "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "status", "aspect": { "json": { - "typeNames": [ - "Folder" - ] + "removed": false } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_file.json" + "runId": "single_file.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "subTypes", "aspect": { "json": { - "container": "urn:li:container:2151647ff17bde0f948909d19fa91b9b" + "typeNames": [ + "Folder" + ] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_file.json" + "runId": "single_file.json", + "lastRunId": "no-run-id-provided" } }, { @@ -338,89 +357,90 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_file.json" + "runId": "single_file.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", + "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "env": "PROD", - "folder_abs_path": "my-test-bucket/folder_a/folder_aa" - }, - "name": "folder_aa" + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_file.json" + "runId": "single_file.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "removed": false + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_file.json" + "runId": "single_file.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", + "entityUrn": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "container", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "container": "urn:li:container:2151647ff17bde0f948909d19fa91b9b" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_file.json" + "runId": "single_file.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", + "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "Folder" - ] + "path": [] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_file.json" + "runId": "single_file.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", + "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "subTypes", "aspect": { "json": { - "container": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c" + "typeNames": [ + "S3 bucket" + ] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_file.json" + "runId": "single_file.json", + "lastRunId": "no-run-id-provided" } }, { @@ -444,7 +464,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_file.json" + "runId": "single_file.json", + "lastRunId": "no-run-id-provided" } }, { @@ -464,37 +485,40 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_file.json" + "runId": "single_file.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:5abb7acbb8783b9e2d266c15bf7cebc0", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "container", "aspect": { "json": { - "removed": false + "container": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_file.json" + "runId": "single_file.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:5abb7acbb8783b9e2d266c15bf7cebc0", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "status", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "removed": false } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_file.json" + "runId": "single_file.json", + "lastRunId": "no-run-id-provided" } }, { @@ -511,22 +535,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_file.json" + "runId": "single_file.json", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", "entityUrn": "urn:li:container:5abb7acbb8783b9e2d266c15bf7cebc0", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "container": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15" + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_file.json" + "runId": "single_file.json", + "lastRunId": "no-run-id-provided" } }, { @@ -554,7 +580,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_file.json" + "runId": "single_file.json", + "lastRunId": "no-run-id-provided" } }, { @@ -569,7 +596,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_file.json" + "runId": "single_file.json", + "lastRunId": "no-run-id-provided" } }, { @@ -584,7 +612,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_file.json" + "runId": "single_file.json", + "lastRunId": "no-run-id-provided" } }, { @@ -616,7 +645,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_file.json" + "runId": "single_file.json", + "lastRunId": "no-run-id-provided" } }, { @@ -631,7 +661,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_file.json" + "runId": "single_file.json", + "lastRunId": "no-run-id-provided" } }, { @@ -646,7 +677,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_file.json" + "runId": "single_file.json", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/sources/s3/folder_no_partition.json b/metadata-ingestion/tests/integration/s3/sources/s3/folder_no_partition.json index c06e411005399e..db3eaadf300407 100644 --- a/metadata-ingestion/tests/integration/s3/sources/s3/folder_no_partition.json +++ b/metadata-ingestion/tests/integration/s3/sources/s3/folder_no_partition.json @@ -2,6 +2,7 @@ "type": "s3", "config": { "env": "UAT", + "sort_schema_fields": true, "path_specs": [{ "include": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/{table}/*.*" }], diff --git a/metadata-ingestion/tests/integration/s3/sources/s3/folder_partition_update_schema_with_partition_autodetect.json b/metadata-ingestion/tests/integration/s3/sources/s3/folder_partition_update_schema_with_partition_autodetect.json new file mode 100644 index 00000000000000..2ed1a4ae9ccd31 --- /dev/null +++ b/metadata-ingestion/tests/integration/s3/sources/s3/folder_partition_update_schema_with_partition_autodetect.json @@ -0,0 +1,23 @@ +{ + "type": "s3", + "config": { + "env": "UAT", + "path_specs": [{ + "include": "s3://my-test-bucket/folder_a/folder_aa/{dept}/{table}/", + "sample_files": true, + "autodetect_partitions": true, + "table_name": "{dept}.{table}", + "exclude":[ + "**/folder_aaaa/**" + ] + }], + "aws_config": { + "aws_region": "us-east-1", + "aws_access_key_id": "testing", + "aws_secret_access_key": "testing" + }, + "profiling": { + "enabled": false + } + } +} \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/sources/s3/folder_partition_with_partition_autodetect_traverse_all.json b/metadata-ingestion/tests/integration/s3/sources/s3/folder_partition_with_partition_autodetect_traverse_all.json new file mode 100644 index 00000000000000..b6aa75822612a7 --- /dev/null +++ b/metadata-ingestion/tests/integration/s3/sources/s3/folder_partition_with_partition_autodetect_traverse_all.json @@ -0,0 +1,24 @@ +{ + "type": "s3", + "config": { + "env": "UAT", + "path_specs": [{ + "include": "s3://my-test-bucket/folder_a/folder_aa/{dept}/{table}/", + "sample_files": true, + "autodetect_partitions": true, + "traversal_method": "ALL", + "table_name": "{dept}.{table}", + "exclude":[ + "**/folder_aaaa/**" + ] + }], + "aws_config": { + "aws_region": "us-east-1", + "aws_access_key_id": "testing", + "aws_secret_access_key": "testing" + }, + "profiling": { + "enabled": false + } + } +} \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/sources/s3/folder_partition_with_partition_autodetect_traverse_min_max.json b/metadata-ingestion/tests/integration/s3/sources/s3/folder_partition_with_partition_autodetect_traverse_min_max.json new file mode 100644 index 00000000000000..ecf4fab1f4b073 --- /dev/null +++ b/metadata-ingestion/tests/integration/s3/sources/s3/folder_partition_with_partition_autodetect_traverse_min_max.json @@ -0,0 +1,24 @@ +{ + "type": "s3", + "config": { + "env": "UAT", + "path_specs": [{ + "include": "s3://my-test-bucket/folder_a/folder_aa/{dept}/{table}/", + "sample_files": true, + "autodetect_partitions": true, + "traversal_method": "MIN_MAX", + "table_name": "{dept}.{table}", + "exclude":[ + "**/folder_aaaa/**" + ] + }], + "aws_config": { + "aws_region": "us-east-1", + "aws_access_key_id": "testing", + "aws_secret_access_key": "testing" + }, + "profiling": { + "enabled": false + } + } +} \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv/part3.csv b/metadata-ingestion/tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv/part3.csv index e69de29bb2d1d6..c757de4cc1d772 100644 --- a/metadata-ingestion/tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv/part3.csv +++ b/metadata-ingestion/tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv/part3.csv @@ -0,0 +1,5 @@ +name,weight,height,color +apple,50,4,red +cookie,23,4,brown +lasagna,49,4,red +pasta,72,4,yellow diff --git a/metadata-ingestion/tests/integration/s3/test_s3.py b/metadata-ingestion/tests/integration/s3/test_s3.py index d255463444b18a..4137c6c5c399ea 100644 --- a/metadata-ingestion/tests/integration/s3/test_s3.py +++ b/metadata-ingestion/tests/integration/s3/test_s3.py @@ -15,6 +15,48 @@ FROZEN_TIME = "2020-04-14 07:00:00" +FILE_LIST_FOR_VALIDATION = [ + "folder_a/folder_aa/folder_aaa/NPS.7.1.package_data_NPS.6.1_ARCN_Lakes_ChemistryData_v1_csv.csv", + "folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro", + "folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv", + "folder_a/folder_aa/folder_aaa/countries_json.json", + "folder_a/folder_aa/folder_aaa/food_parquet.parquet", + "folder_a/folder_aa/folder_aaa/small.csv", + "folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv", + "folder_a/folder_aa/folder_aaa/folder_aaaa/pokemon_abilities_yearwise_2019/month=feb/part1.json", + "folder_a/folder_aa/folder_aaa/folder_aaaa/pokemon_abilities_yearwise_2019/month=feb/part2.json", + "folder_a/folder_aa/folder_aaa/folder_aaaa/pokemon_abilities_yearwise_2019/month=jan/part1.json", + "folder_a/folder_aa/folder_aaa/folder_aaaa/pokemon_abilities_yearwise_2019/month=jan/part2.json", + "folder_a/folder_aa/folder_aaa/folder_aaaa/pokemon_abilities_yearwise_2020/month=feb/part1.json", + "folder_a/folder_aa/folder_aaa/folder_aaaa/pokemon_abilities_yearwise_2020/month=feb/part2.json", + "folder_a/folder_aa/folder_aaa/folder_aaaa/pokemon_abilities_yearwise_2020/month=march/part1.json", + "folder_a/folder_aa/folder_aaa/folder_aaaa/pokemon_abilities_yearwise_2020/month=march/part2.json", + "folder_a/folder_aa/folder_aaa/folder_aaaa/pokemon_abilities_yearwise_2021/month=april/part1.json", + "folder_a/folder_aa/folder_aaa/folder_aaaa/pokemon_abilities_yearwise_2021/month=april/part2.json", + "folder_a/folder_aa/folder_aaa/folder_aaaa/pokemon_abilities_yearwise_2021/month=march/part1.json", + "folder_a/folder_aa/folder_aaa/folder_aaaa/pokemon_abilities_yearwise_2021/month=march/part2.json", + "folder_a/folder_aa/folder_aaa/food_csv/part1.csv", + "folder_a/folder_aa/folder_aaa/food_csv/part2.csv", + "folder_a/folder_aa/folder_aaa/food_csv/part3.csv", + "folder_a/folder_aa/folder_aaa/food_parquet/part1.parquet", + "folder_a/folder_aa/folder_aaa/food_parquet/part2.parquet", + "folder_a/folder_aa/folder_aaa/no_extension/small", + "folder_a/folder_aa/folder_aaa/pokemon_abilities_json/year=2019/month=feb/part1.json", + "folder_a/folder_aa/folder_aaa/pokemon_abilities_json/year=2019/month=feb/part2.json", + "folder_a/folder_aa/folder_aaa/pokemon_abilities_json/year=2019/month=jan/part1.json", + "folder_a/folder_aa/folder_aaa/pokemon_abilities_json/year=2019/month=jan/part2.json", + "folder_a/folder_aa/folder_aaa/pokemon_abilities_json/year=2020/month=feb/part1.json", + "folder_a/folder_aa/folder_aaa/pokemon_abilities_json/year=2020/month=feb/part2.json", + "folder_a/folder_aa/folder_aaa/pokemon_abilities_json/year=2020/month=march/part1.json", + "folder_a/folder_aa/folder_aaa/pokemon_abilities_json/year=2020/month=march/part2.json", + "folder_a/folder_aa/folder_aaa/pokemon_abilities_json/year=2021/month=april/part1.json", + "folder_a/folder_aa/folder_aaa/pokemon_abilities_json/year=2021/month=april/part2.json", + "folder_a/folder_aa/folder_aaa/pokemon_abilities_json/year=2021/month=march/part1.json", + "folder_a/folder_aa/folder_aaa/pokemon_abilities_json/year=2021/month=march/part2.json", + "folder_a/folder_aa/folder_aaa/pokemon_abilities_json/year=2022/month=jan/part3.json", + "folder_a/folder_aa/folder_aaa/pokemon_abilities_json/year=2022/month=jan/_temporary/dummy.json", +] + @pytest.fixture(scope="module", autouse=True) def bucket_names(): @@ -60,10 +102,13 @@ def s3_populate(pytestconfig, s3_resource, s3_client, bucket_names): current_time_sec = datetime.strptime( FROZEN_TIME, "%Y-%m-%d %H:%M:%S" ).timestamp() + file_list = [] for root, _dirs, files in os.walk(test_resources_dir): + _dirs.sort() for file in sorted(files): full_path = os.path.join(root, file) rel_path = os.path.relpath(full_path, test_resources_dir) + file_list.append(rel_path) bkt.upload_file(full_path, rel_path) s3_client.put_object_tagging( Bucket=bucket_name, @@ -77,6 +122,9 @@ def s3_populate(pytestconfig, s3_resource, s3_client, bucket_names): ) current_time_sec += 10 key.last_modified = datetime.fromtimestamp(current_time_sec) + + # This is used to make sure the list of files are the same in the test as locally + assert file_list == FILE_LIST_FOR_VALIDATION yield diff --git a/metadata-ingestion/tests/integration/salesforce/test_salesforce.py b/metadata-ingestion/tests/integration/salesforce/test_salesforce.py index 8b6b883b2148d2..89a37a372df843 100644 --- a/metadata-ingestion/tests/integration/salesforce/test_salesforce.py +++ b/metadata-ingestion/tests/integration/salesforce/test_salesforce.py @@ -1,10 +1,12 @@ import json import pathlib from unittest import mock +from unittest.mock import Mock from freezegun import freeze_time from datahub.ingestion.run.pipeline import Pipeline +from datahub.ingestion.source.salesforce import SalesforceConfig, SalesforceSource from tests.test_helpers import mce_helpers FROZEN_TIME = "2022-05-12 11:00:00" @@ -19,15 +21,16 @@ def _read_response(file_name: str) -> dict: return data -def side_effect_call_salesforce(type, url): - class MockResponse: - def __init__(self, json_data, status_code): - self.json_data = json_data - self.status_code = status_code +class MockResponse: + def __init__(self, json_data, status_code): + self.json_data = json_data + self.status_code = status_code + + def json(self): + return self.json_data - def json(self): - return self.json_data +def side_effect_call_salesforce(type, url): if url.endswith("/services/data/"): return MockResponse(_read_response("versions_response.json"), 200) if url.endswith("FROM EntityDefinition WHERE IsCustomizable = true"): @@ -55,9 +58,92 @@ def json(self): return MockResponse({}, 404) +@mock.patch("datahub.ingestion.source.salesforce.Salesforce") +def test_latest_version(mock_sdk): + mock_sf = mock.Mock() + mocked_call = mock.Mock() + mocked_call.side_effect = side_effect_call_salesforce + mock_sf._call_salesforce = mocked_call + mock_sdk.return_value = mock_sf + + config = SalesforceConfig.parse_obj( + { + "auth": "DIRECT_ACCESS_TOKEN", + "instance_url": "https://mydomain.my.salesforce.com/", + "access_token": "access_token`", + "ingest_tags": True, + "object_pattern": { + "allow": [ + "^Account$", + "^Property__c$", + ], + }, + "domain": {"sales": {"allow": {"^Property__c$"}}}, + "profiling": {"enabled": True}, + "profile_pattern": { + "allow": [ + "^Property__c$", + ] + }, + } + ) + SalesforceSource(config=config, ctx=Mock()) + calls = mock_sf._call_salesforce.mock_calls + assert ( + len(calls) == 1 + ), "We didn't specify version but source didn't call SF API to get the latest one" + assert calls[0].ends_with( + "/services/data" + ), "Source didn't call proper SF API endpoint to get all versions" + assert ( + mock_sf.sf_version == "54.0" + ), "API version was not correctly set (see versions_responses.json)" + + +@mock.patch("datahub.ingestion.source.salesforce.Salesforce") +def test_custom_version(mock_sdk): + mock_sf = mock.Mock() + mocked_call = mock.Mock() + mocked_call.side_effect = side_effect_call_salesforce + mock_sf._call_salesforce = mocked_call + mock_sdk.return_value = mock_sf + + config = SalesforceConfig.parse_obj( + { + "auth": "DIRECT_ACCESS_TOKEN", + "api_version": "46.0", + "instance_url": "https://mydomain.my.salesforce.com/", + "access_token": "access_token`", + "ingest_tags": True, + "object_pattern": { + "allow": [ + "^Account$", + "^Property__c$", + ], + }, + "domain": {"sales": {"allow": {"^Property__c$"}}}, + "profiling": {"enabled": True}, + "profile_pattern": { + "allow": [ + "^Property__c$", + ] + }, + } + ) + SalesforceSource(config=config, ctx=Mock()) + + calls = mock_sf._call_salesforce.mock_calls + assert ( + len(calls) == 0 + ), "Source called API to get all versions even though we specified proper version" + assert ( + mock_sdk.call_args.kwargs["version"] == "46.0" + ), "API client object was not correctly initialized with the custom version" + + @freeze_time(FROZEN_TIME) def test_salesforce_ingest(pytestconfig, tmp_path): - with mock.patch("simple_salesforce.Salesforce") as mock_sdk: + with mock.patch("datahub.ingestion.source.salesforce.Salesforce") as mock_sdk: mock_sf = mock.Mock() mocked_call = mock.Mock() mocked_call.side_effect = side_effect_call_salesforce diff --git a/metadata-ingestion/tests/integration/snowflake/common.py b/metadata-ingestion/tests/integration/snowflake/common.py index 5ef2eb420b8ed5..c4362368e28cd8 100644 --- a/metadata-ingestion/tests/integration/snowflake/common.py +++ b/metadata-ingestion/tests/integration/snowflake/common.py @@ -156,7 +156,7 @@ on basic_usage_counts.bucket_start_time = user_usage_counts.bucket_start_time and basic_usage_counts.object_name = user_usage_counts.object_name where - basic_usage_counts.object_domain in ('Table','External table','View','Materialized view') + basic_usage_counts.object_domain in ('Table','External table','View','Materialized view','Iceberg table') and basic_usage_counts.object_name is not null group by basic_usage_counts.object_name, diff --git a/metadata-ingestion/tests/performance/bigquery/test_bigquery_usage.py b/metadata-ingestion/tests/performance/bigquery/test_bigquery_usage.py index 92ad7b383dc1c0..9cb80ff02657bb 100644 --- a/metadata-ingestion/tests/performance/bigquery/test_bigquery_usage.py +++ b/metadata-ingestion/tests/performance/bigquery/test_bigquery_usage.py @@ -6,12 +6,12 @@ import humanfriendly import psutil -from datahub.emitter.mce_builder import make_dataset_urn from datahub.ingestion.source.bigquery_v2.bigquery_config import ( BigQueryUsageConfig, BigQueryV2Config, ) from datahub.ingestion.source.bigquery_v2.bigquery_report import BigQueryV2Report +from datahub.ingestion.source.bigquery_v2.common import BigQueryIdentifierBuilder from datahub.ingestion.source.bigquery_v2.usage import BigQueryUsageExtractor from datahub.sql_parsing.schema_resolver import SchemaResolver from datahub.utilities.perf_timer import PerfTimer @@ -49,9 +49,7 @@ def run_test(): config, report, schema_resolver=SchemaResolver(platform="bigquery"), - dataset_urn_builder=lambda ref: make_dataset_urn( - "bigquery", str(ref.table_identifier) - ), + identifiers=BigQueryIdentifierBuilder(config, report), ) report.set_ingestion_stage("All", "Event Generation") diff --git a/metadata-ingestion/tests/test_helpers/docker_helpers.py b/metadata-ingestion/tests/test_helpers/docker_helpers.py index bacb8d80b9e721..20aec975787e4e 100644 --- a/metadata-ingestion/tests/test_helpers/docker_helpers.py +++ b/metadata-ingestion/tests/test_helpers/docker_helpers.py @@ -1,52 +1,16 @@ -import contextlib import logging import os import subprocess -from typing import Callable, Iterator, List, Optional, Union import pytest -import pytest_docker.plugin -logger = logging.getLogger(__name__) - - -def is_responsive(container_name: str, port: int, hostname: Optional[str]) -> bool: - """A cheap way to figure out if a port is responsive on a container""" - if hostname: - cmd = f"docker exec {container_name} /bin/bash -c 'echo -n > /dev/tcp/{hostname}/{port}'" - else: - # use the hostname of the container - cmd = f"docker exec {container_name} /bin/bash -c 'c_host=`hostname`;echo -n > /dev/tcp/$c_host/{port}'" - ret = subprocess.run( - cmd, - shell=True, - ) - return ret.returncode == 0 +from datahub.testing.docker_utils import ( # noqa: F401 + docker_compose_runner, + is_responsive, + wait_for_port, +) - -def wait_for_port( - docker_services: pytest_docker.plugin.Services, - container_name: str, - container_port: int, - hostname: Optional[str] = None, - timeout: float = 30.0, - pause: float = 0.5, - checker: Optional[Callable[[], bool]] = None, -) -> None: - try: - docker_services.wait_until_responsive( - timeout=timeout, - pause=pause, - check=( - checker - if checker - else lambda: is_responsive(container_name, container_port, hostname) - ), - ) - logger.info(f"Container {container_name} is ready!") - finally: - # use check=True to raise an error if command gave bad exit code - subprocess.run(f"docker logs {container_name}", shell=True, check=True) +logger = logging.getLogger(__name__) @pytest.fixture(scope="session") @@ -58,28 +22,6 @@ def docker_compose_command(): return "docker compose" -@pytest.fixture(scope="module") -def docker_compose_runner( - docker_compose_command, docker_compose_project_name, docker_setup, docker_cleanup -): - @contextlib.contextmanager - def run( - compose_file_path: Union[str, List[str]], key: str, cleanup: bool = True - ) -> Iterator[pytest_docker.plugin.Services]: - with pytest_docker.plugin.get_docker_services( - docker_compose_command=docker_compose_command, - # We can remove the type ignore once this is merged: - # https://github.com/avast/pytest-docker/pull/108 - docker_compose_file=compose_file_path, # type: ignore - docker_compose_project_name=f"{docker_compose_project_name}-{key}", - docker_setup=docker_setup, - docker_cleanup=docker_cleanup if cleanup else [], - ) as docker_services: - yield docker_services - - return run - - def cleanup_image(image_name: str) -> None: assert ":" not in image_name, "image_name should not contain a tag" diff --git a/metadata-ingestion/tests/unit/s3/test_s3_source.py b/metadata-ingestion/tests/unit/s3/test_s3_source.py index 2eb386e39b0e55..f826cf0179e221 100644 --- a/metadata-ingestion/tests/unit/s3/test_s3_source.py +++ b/metadata-ingestion/tests/unit/s3/test_s3_source.py @@ -1,4 +1,6 @@ -from typing import List +from typing import List, Tuple + +import pytest from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.api.workunit import MetadataWorkUnit @@ -70,6 +72,75 @@ def test_path_spec(): assert path_spec.allowed(path) +def test_path_spec_with_double_star_ending(): + path_spec = PathSpec( + include="s3://my-bucket/{table}/**", + default_extension="csv", + allow_double_stars=True, + ) + path = "s3://my-bucket/my-folder/year=2022/month=10/day=11/my_csv.csv" + assert path_spec.allowed(path) + vars = path_spec.get_named_vars(path) + assert vars + assert vars["table"] == "my-folder" + + +@pytest.mark.parametrize( + "path_spec,path, expected", + [ + pytest.param( + "s3://my-bucket/{table}/**", + "s3://my-bucket/my-folder/year=2022/month=10/day=11/my_csv", + [("year", "2022"), ("month", "10"), ("day", "11")], + id="autodetect_partitions", + ), + pytest.param( + "s3://my-bucket/{table}/{partition_key[0]}={partition_value[0]}/{partition_key[1]}={partition_value[1]}/{partition_key[2]}={partition_value[2]}/*.csv", + "s3://my-bucket/my-folder/year=2022/month=10/day=11/my_csv.csv", + [("year", "2022"), ("month", "10"), ("day", "11")], + id="partition_key and value set", + ), + pytest.param( + "s3://my-bucket/{table}/{partition_key[0]}={partition[0]}/{partition_key[1]}={partition[1]}/{partition_key[2]}={partition[2]}/*.csv", + "s3://my-bucket/my-folder/year=2022/month=10/day=11/my_csv.csv", + [("year", "2022"), ("month", "10"), ("day", "11")], + id="partition_key and partition set", + ), + pytest.param( + "s3://my-bucket/{table}/{year}/{month}/{day}/*.csv", + "s3://my-bucket/my-folder/2022/10/11/my_csv.csv", + [("year", "2022"), ("month", "10"), ("day", "11")], + id="named partition keys", + ), + pytest.param( + "s3://my-bucket/{table}/{part[0]}/{part[1]}/{part[2]}/*.csv", + "s3://my-bucket/my-folder/2022/10/11/my_csv.csv", + [("part_0", "2022"), ("part_1", "10"), ("part_2", "11")], + id="indexed partition keys", + ), + pytest.param( + "s3://my-bucket/{table}/**", + "s3://my-bucket/my-folder/2022/10/11/my_csv.csv", + [("partition_0", "2022"), ("partition_1", "10"), ("partition_2", "11")], + id="partition autodetect with partition values only", + ), + pytest.param( + "s3://my-bucket/{table}/**", + "s3://my-bucket/my-folder/my_csv.csv", + None, + id="partition autodetect with non partitioned path", + ), + ], +) +def test_path_spec_partition_detection( + path_spec: str, path: str, expected: List[Tuple[str, str]] +) -> None: + ps = PathSpec(include=path_spec, default_extension="csv", allow_double_stars=True) + assert ps.allowed(path) + partitions = ps.get_partition_from_path(path) + assert partitions == expected + + def test_path_spec_dir_allowed(): path_spec = PathSpec( include="s3://my-bucket/my-folder/year=*/month=*/day=*/*.csv", diff --git a/metadata-ingestion/tests/unit/sagemaker/test_sagemaker_source.py b/metadata-ingestion/tests/unit/sagemaker/test_sagemaker_source.py index 582d16f5d2612e..995d176c213b24 100644 --- a/metadata-ingestion/tests/unit/sagemaker/test_sagemaker_source.py +++ b/metadata-ingestion/tests/unit/sagemaker/test_sagemaker_source.py @@ -1,3 +1,5 @@ +from unittest.mock import patch + from botocore.stub import Stubber from freezegun import freeze_time @@ -220,8 +222,17 @@ def test_sagemaker_ingest(tmp_path, pytestconfig): {"ModelName": "the-second-model"}, ) - mce_objects = [wu.metadata for wu in sagemaker_source_instance.get_workunits()] - write_metadata_file(tmp_path / "sagemaker_mces.json", mce_objects) + # Patch the client factory's get_client method to return the stubbed client for jobs + with patch.object( + sagemaker_source_instance.client_factory, + "get_client", + return_value=sagemaker_source_instance.sagemaker_client, + ): + # Run the test and generate the MCEs + mce_objects = [ + wu.metadata for wu in sagemaker_source_instance.get_workunits() + ] + write_metadata_file(tmp_path / "sagemaker_mces.json", mce_objects) # Verify the output. test_resources_dir = pytestconfig.rootpath / "tests/unit/sagemaker" diff --git a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_basic_usage.json b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_basic_usage.json new file mode 100644 index 00000000000000..6ea29520dc6883 --- /dev/null +++ b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_basic_usage.json @@ -0,0 +1,51 @@ +[ +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetUsageStatistics", + "aspect": { + "json": { + "timestampMillis": 1707177600000, + "eventGranularity": { + "unit": "DAY", + "multiple": 1 + }, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "uniqueUserCount": 2, + "totalSqlQueries": 6, + "topSqlQueries": [ + "SELECT\n *\nFROM foo", + "CREATE TABLE bar AS\nSELECT\n b + c AS c\nFROM foo" + ], + "userCounts": [ + { + "user": "urn:li:corpuser:user1", + "count": 5 + }, + { + "user": "urn:li:corpuser:user2", + "count": 1 + } + ], + "fieldCounts": [ + { + "fieldPath": "b", + "count": 6 + }, + { + "fieldPath": "c", + "count": 6 + }, + { + "fieldPath": "a", + "count": 5 + } + ] + } + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_create_table_query_mcps.json b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_create_table_query_mcps.json new file mode 100644 index 00000000000000..ddb6d931ac9a51 --- /dev/null +++ b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_create_table_query_mcps.json @@ -0,0 +1,77 @@ +[ +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,dev.dataset.foo,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1707182625000, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "CREATE", + "customProperties": { + "query_urn": "urn:li:query:f2e61c641cf14eae74147b6280ae40648516c4b7b58cfca6c4f7fb14ab255ce2" + }, + "lastUpdatedTimestamp": 1707182625000 + } + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:f2e61c641cf14eae74147b6280ae40648516c4b7b58cfca6c4f7fb14ab255ce2", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "CREATE OR REPLACE TABLE `dataset.foo` (\n date_utc TIMESTAMP,\n revenue INT64\n)", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 1707182625000, + "actor": "urn:li:corpuser:_ingestion" + }, + "lastModified": { + "time": 1707182625000, + "actor": "urn:li:corpuser:_ingestion" + } + } + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:f2e61c641cf14eae74147b6280ae40648516c4b7b58cfca6c4f7fb14ab255ce2", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,dev.dataset.foo,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,dev.dataset.foo,PROD),date_utc)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,dev.dataset.foo,PROD),revenue)" + } + ] + } + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:f2e61c641cf14eae74147b6280ae40648516c4b7b58cfca6c4f7fb14ab255ce2", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_lineage_via_temp_table_disordered_add.json b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_lineage_via_temp_table_disordered_add.json new file mode 100644 index 00000000000000..51a732e7a24940 --- /dev/null +++ b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_lineage_via_temp_table_disordered_add.json @@ -0,0 +1,79 @@ +[ +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.derived_from_foo,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1707182625000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.bar,PROD)", + "type": "TRANSFORMED", + "query": "urn:li:query:composite_39f4adf89c8ad4d6d307b628c82d8260e1c5cd7eb6fb3a8cbb437421f970c16f" + } + ] + } + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:composite_39f4adf89c8ad4d6d307b628c82d8260e1c5cd7eb6fb3a8cbb437421f970c16f", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "CREATE TEMPORARY TABLE foo AS\nSELECT\n a,\n b + c AS c\nFROM bar;\n\nCREATE TABLE derived_from_foo AS\nSELECT\n *\nFROM foo", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "lastModified": { + "time": 1707182625000, + "actor": "urn:li:corpuser:_ingestion" + } + } + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:composite_39f4adf89c8ad4d6d307b628c82d8260e1c5cd7eb6fb3a8cbb437421f970c16f", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.bar,PROD)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.derived_from_foo,PROD)" + } + ] + } + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:composite_39f4adf89c8ad4d6d307b628c82d8260e1c5cd7eb6fb3a8cbb437421f970c16f", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:redshift" + } + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_sql_aggregator.py b/metadata-ingestion/tests/unit/sql_parsing/test_sql_aggregator.py index 5c6abf4c9371d6..2e15dabb10d114 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/test_sql_aggregator.py +++ b/metadata-ingestion/tests/unit/sql_parsing/test_sql_aggregator.py @@ -4,6 +4,9 @@ import pytest from freezegun import freeze_time +from datahub.configuration.datetimes import parse_user_datetime +from datahub.configuration.time_window_config import BucketDuration, get_time_bucket +from datahub.ingestion.source.usage.usage_common import BaseUsageConfig from datahub.metadata.urns import CorpUserUrn, DatasetUrn from datahub.sql_parsing.sql_parsing_aggregator import ( KnownQueryLineageInfo, @@ -20,7 +23,7 @@ from tests.test_helpers.click_helpers import run_datahub_cmd RESOURCE_DIR = pathlib.Path(__file__).parent / "aggregator_goldens" -FROZEN_TIME = "2024-02-06 01:23:45" +FROZEN_TIME = "2024-02-06T01:23:45Z" def _ts(ts: int) -> datetime: @@ -499,3 +502,102 @@ def test_table_rename(pytestconfig: pytest.Config) -> None: outputs=mcps, golden_path=RESOURCE_DIR / "test_table_rename.json", ) + + +@freeze_time(FROZEN_TIME) +def test_create_table_query_mcps(pytestconfig: pytest.Config) -> None: + aggregator = SqlParsingAggregator( + platform="bigquery", + generate_lineage=True, + generate_usage_statistics=False, + generate_operations=True, + ) + + aggregator.add_observed_query( + query="create or replace table `dataset.foo` (date_utc timestamp, revenue int);", + default_db="dev", + default_schema="public", + query_timestamp=datetime.now(), + ) + + mcps = list(aggregator.gen_metadata()) + + mce_helpers.check_goldens_stream( + pytestconfig, + outputs=mcps, + golden_path=RESOURCE_DIR / "test_create_table_query_mcps.json", + ) + + +@freeze_time(FROZEN_TIME) +def test_lineage_via_temp_table_disordered_add(pytestconfig: pytest.Config) -> None: + aggregator = SqlParsingAggregator( + platform="redshift", + generate_lineage=True, + generate_usage_statistics=False, + generate_operations=False, + ) + + aggregator.add_observed_query( + query="create table derived_from_foo as select * from foo", + default_db="dev", + default_schema="public", + ) + aggregator.add_observed_query( + query="create temp table foo as select a, b+c as c from bar", + default_db="dev", + default_schema="public", + ) + + mcps = list(aggregator.gen_metadata()) + + mce_helpers.check_goldens_stream( + pytestconfig, + outputs=mcps, + golden_path=RESOURCE_DIR / "test_lineage_via_temp_table_disordered_add.json", + ) + + +@freeze_time(FROZEN_TIME) +def test_basic_usage(pytestconfig: pytest.Config) -> None: + + frozen_timestamp = parse_user_datetime(FROZEN_TIME) + aggregator = SqlParsingAggregator( + platform="redshift", + generate_lineage=False, + generate_usage_statistics=True, + generate_operations=False, + usage_config=BaseUsageConfig( + start_time=get_time_bucket(frozen_timestamp, BucketDuration.DAY), + end_time=frozen_timestamp, + ), + ) + + aggregator._schema_resolver.add_raw_schema_info( + DatasetUrn("redshift", "dev.public.foo").urn(), + {"a": "int", "b": "int", "c": "int"}, + ) + + aggregator.add_observed_query( + query="select * from foo", + default_db="dev", + default_schema="public", + usage_multiplier=5, + query_timestamp=frozen_timestamp, + user=CorpUserUrn("user1"), + ) + aggregator.add_observed_query( + query="create table bar as select b+c as c from foo", + default_db="dev", + default_schema="public", + query_timestamp=frozen_timestamp, + user=CorpUserUrn("user2"), + ) + + mcps = list(aggregator.gen_metadata()) + + mce_helpers.check_goldens_stream( + pytestconfig, + outputs=mcps, + golden_path=RESOURCE_DIR / "test_basic_usage.json", + ) diff --git a/metadata-ingestion/tests/unit/test_bigquery_lineage.py b/metadata-ingestion/tests/unit/test_bigquery_lineage.py index 5d8c040b4123b5..6bd5cc4d3226e2 100644 --- a/metadata-ingestion/tests/unit/test_bigquery_lineage.py +++ b/metadata-ingestion/tests/unit/test_bigquery_lineage.py @@ -3,13 +3,13 @@ import pytest -import datahub.emitter.mce_builder as builder from datahub.ingestion.source.bigquery_v2.bigquery_audit import ( BigQueryTableRef, QueryEvent, ) from datahub.ingestion.source.bigquery_v2.bigquery_config import BigQueryV2Config from datahub.ingestion.source.bigquery_v2.bigquery_report import BigQueryV2Report +from datahub.ingestion.source.bigquery_v2.common import BigQueryIdentifierBuilder from datahub.ingestion.source.bigquery_v2.lineage import ( BigqueryLineageExtractor, LineageEdge, @@ -83,7 +83,7 @@ def test_lineage_with_timestamps(lineage_entries: List[QueryEvent]) -> None: config = BigQueryV2Config() report = BigQueryV2Report() extractor: BigqueryLineageExtractor = BigqueryLineageExtractor( - config, report, lambda x: builder.make_dataset_urn("bigquery", str(x)) + config, report, BigQueryIdentifierBuilder(config, report) ) bq_table = BigQueryTableRef.from_string_name( @@ -108,7 +108,7 @@ def test_column_level_lineage(lineage_entries: List[QueryEvent]) -> None: config = BigQueryV2Config(extract_column_lineage=True, incremental_lineage=False) report = BigQueryV2Report() extractor: BigqueryLineageExtractor = BigqueryLineageExtractor( - config, report, lambda x: builder.make_dataset_urn("bigquery", str(x)) + config, report, BigQueryIdentifierBuilder(config, report) ) bq_table = BigQueryTableRef.from_string_name( diff --git a/metadata-ingestion/tests/unit/test_bigquery_source.py b/metadata-ingestion/tests/unit/test_bigquery_source.py index 746cf9b0acfc3e..8ec19e5bb9e56f 100644 --- a/metadata-ingestion/tests/unit/test_bigquery_source.py +++ b/metadata-ingestion/tests/unit/test_bigquery_source.py @@ -31,6 +31,7 @@ BigqueryTable, BigqueryTableSnapshot, BigqueryView, + get_projects, ) from datahub.ingestion.source.bigquery_v2.bigquery_schema_gen import ( BigQuerySchemaGenerator, @@ -170,7 +171,11 @@ def test_bigquery_uri_with_credential(): @patch.object(BigQueryV2Config, "get_bigquery_client") -def test_get_projects_with_project_ids(get_bq_client_mock): +@patch.object(BigQueryV2Config, "get_projects_client") +def test_get_projects_with_project_ids( + get_projects_client, + get_bq_client_mock, +): client_mock = MagicMock() get_bq_client_mock.return_value = client_mock config = BigQueryV2Config.parse_obj( @@ -179,7 +184,11 @@ def test_get_projects_with_project_ids(get_bq_client_mock): } ) source = BigqueryV2Source(config=config, ctx=PipelineContext(run_id="test1")) - assert source._get_projects() == [ + assert get_projects( + source.bq_schema_extractor.schema_api, + source.report, + source.filters, + ) == [ BigqueryProject("test-1", "test-1"), BigqueryProject("test-2", "test-2"), ] @@ -189,7 +198,11 @@ def test_get_projects_with_project_ids(get_bq_client_mock): {"project_ids": ["test-1", "test-2"], "project_id": "test-3"} ) source = BigqueryV2Source(config=config, ctx=PipelineContext(run_id="test2")) - assert source._get_projects() == [ + assert get_projects( + source.bq_schema_extractor.schema_api, + source.report, + source.filters, + ) == [ BigqueryProject("test-1", "test-1"), BigqueryProject("test-2", "test-2"), ] @@ -197,8 +210,10 @@ def test_get_projects_with_project_ids(get_bq_client_mock): @patch.object(BigQueryV2Config, "get_bigquery_client") +@patch.object(BigQueryV2Config, "get_projects_client") def test_get_projects_with_project_ids_overrides_project_id_pattern( - get_bq_client_mock, + get_projects_client, + get_bigquery_client, ): config = BigQueryV2Config.parse_obj( { @@ -207,7 +222,11 @@ def test_get_projects_with_project_ids_overrides_project_id_pattern( } ) source = BigqueryV2Source(config=config, ctx=PipelineContext(run_id="test")) - projects = source._get_projects() + projects = get_projects( + source.bq_schema_extractor.schema_api, + source.report, + source.filters, + ) assert projects == [ BigqueryProject(id="test-project", name="test-project"), BigqueryProject(id="test-project-2", name="test-project-2"), @@ -220,13 +239,19 @@ def test_platform_instance_config_always_none(): ) assert config.platform_instance is None - config = BigQueryV2Config(platform_instance="something", project_id="project_id") - assert config.project_id == "project_id" + config = BigQueryV2Config.parse_obj( + dict(platform_instance="something", project_id="project_id") + ) + assert config.project_ids == ["project_id"] assert config.platform_instance is None @patch.object(BigQueryV2Config, "get_bigquery_client") -def test_get_dataplatform_instance_aspect_returns_project_id(get_bq_client_mock): +@patch.object(BigQueryV2Config, "get_projects_client") +def test_get_dataplatform_instance_aspect_returns_project_id( + get_projects_client, + get_bq_client_mock, +): project_id = "project_id" expected_instance = ( f"urn:li:dataPlatformInstance:(urn:li:dataPlatform:bigquery,{project_id})" @@ -247,7 +272,11 @@ def test_get_dataplatform_instance_aspect_returns_project_id(get_bq_client_mock) @patch.object(BigQueryV2Config, "get_bigquery_client") -def test_get_dataplatform_instance_default_no_instance(get_bq_client_mock): +@patch.object(BigQueryV2Config, "get_projects_client") +def test_get_dataplatform_instance_default_no_instance( + get_projects_client, + get_bq_client_mock, +): config = BigQueryV2Config.parse_obj({}) source = BigqueryV2Source(config=config, ctx=PipelineContext(run_id="test")) schema_gen = source.bq_schema_extractor @@ -263,21 +292,30 @@ def test_get_dataplatform_instance_default_no_instance(get_bq_client_mock): @patch.object(BigQueryV2Config, "get_bigquery_client") -def test_get_projects_with_single_project_id(get_bq_client_mock): +@patch.object(BigQueryV2Config, "get_projects_client") +def test_get_projects_with_single_project_id( + get_projects_client, + get_bq_client_mock, +): client_mock = MagicMock() get_bq_client_mock.return_value = client_mock config = BigQueryV2Config.parse_obj({"project_id": "test-3"}) source = BigqueryV2Source(config=config, ctx=PipelineContext(run_id="test1")) - assert source._get_projects() == [ + assert get_projects( + source.bq_schema_extractor.schema_api, + source.report, + source.filters, + ) == [ BigqueryProject("test-3", "test-3"), ] assert client_mock.list_projects.call_count == 0 @patch.object(BigQueryV2Config, "get_bigquery_client") -def test_get_projects_by_list(get_bq_client_mock): +@patch.object(BigQueryV2Config, "get_projects_client") +def test_get_projects_by_list(get_projects_client, get_bigquery_client): client_mock = MagicMock() - get_bq_client_mock.return_value = client_mock + get_bigquery_client.return_value = client_mock first_page = MagicMock() first_page.__iter__.return_value = iter( @@ -296,11 +334,16 @@ def test_get_projects_by_list(get_bq_client_mock): ] ) second_page.next_page_token = None + client_mock.list_projects.side_effect = [first_page, second_page] config = BigQueryV2Config.parse_obj({}) source = BigqueryV2Source(config=config, ctx=PipelineContext(run_id="test1")) - assert source._get_projects() == [ + assert get_projects( + source.bq_schema_extractor.schema_api, + source.report, + source.filters, + ) == [ BigqueryProject("test-1", "one"), BigqueryProject("test-2", "two"), BigqueryProject("test-3", "three"), @@ -311,7 +354,10 @@ def test_get_projects_by_list(get_bq_client_mock): @patch.object(BigQuerySchemaApi, "get_projects") @patch.object(BigQueryV2Config, "get_bigquery_client") -def test_get_projects_filter_by_pattern(get_bq_client_mock, get_projects_mock): +@patch.object(BigQueryV2Config, "get_projects_client") +def test_get_projects_filter_by_pattern( + get_projects_client, get_bq_client_mock, get_projects_mock +): get_projects_mock.return_value = [ BigqueryProject("test-project", "Test Project"), BigqueryProject("test-project-2", "Test Project 2"), @@ -321,7 +367,11 @@ def test_get_projects_filter_by_pattern(get_bq_client_mock, get_projects_mock): {"project_id_pattern": {"deny": ["^test-project$"]}} ) source = BigqueryV2Source(config=config, ctx=PipelineContext(run_id="test")) - projects = source._get_projects() + projects = get_projects( + source.bq_schema_extractor.schema_api, + source.report, + source.filters, + ) assert projects == [ BigqueryProject(id="test-project-2", name="Test Project 2"), ] @@ -329,20 +379,29 @@ def test_get_projects_filter_by_pattern(get_bq_client_mock, get_projects_mock): @patch.object(BigQuerySchemaApi, "get_projects") @patch.object(BigQueryV2Config, "get_bigquery_client") -def test_get_projects_list_empty(get_bq_client_mock, get_projects_mock): +@patch.object(BigQueryV2Config, "get_projects_client") +def test_get_projects_list_empty( + get_projects_client, get_bq_client_mock, get_projects_mock +): get_projects_mock.return_value = [] config = BigQueryV2Config.parse_obj( {"project_id_pattern": {"deny": ["^test-project$"]}} ) source = BigqueryV2Source(config=config, ctx=PipelineContext(run_id="test")) - projects = source._get_projects() + projects = get_projects( + source.bq_schema_extractor.schema_api, + source.report, + source.filters, + ) assert len(source.report.failures) == 1 assert projects == [] @patch.object(BigQueryV2Config, "get_bigquery_client") +@patch.object(BigQueryV2Config, "get_projects_client") def test_get_projects_list_failure( + get_projects_client: MagicMock, get_bq_client_mock: MagicMock, caplog: pytest.LogCaptureFixture, ) -> None: @@ -357,7 +416,11 @@ def test_get_projects_list_failure( source = BigqueryV2Source(config=config, ctx=PipelineContext(run_id="test")) caplog.clear() with caplog.at_level(logging.ERROR): - projects = source._get_projects() + projects = get_projects( + source.bq_schema_extractor.schema_api, + source.report, + source.filters, + ) assert len(caplog.records) == 2 assert error_str in caplog.records[0].msg assert len(source.report.failures) == 1 @@ -366,14 +429,21 @@ def test_get_projects_list_failure( @patch.object(BigQuerySchemaApi, "get_projects") @patch.object(BigQueryV2Config, "get_bigquery_client") -def test_get_projects_list_fully_filtered(get_projects_mock, get_bq_client_mock): +@patch.object(BigQueryV2Config, "get_projects_client") +def test_get_projects_list_fully_filtered( + get_projects_mock, get_bq_client_mock, get_projects_client +): get_projects_mock.return_value = [BigqueryProject("test-project", "Test Project")] config = BigQueryV2Config.parse_obj( {"project_id_pattern": {"deny": ["^test-project$"]}} ) source = BigqueryV2Source(config=config, ctx=PipelineContext(run_id="test")) - projects = source._get_projects() + projects = get_projects( + source.bq_schema_extractor.schema_api, + source.report, + source.filters, + ) assert len(source.report.failures) == 0 assert projects == [] @@ -399,7 +469,10 @@ def bigquery_table() -> BigqueryTable: @patch.object(BigQueryV2Config, "get_bigquery_client") -def test_gen_table_dataset_workunits(get_bq_client_mock, bigquery_table): +@patch.object(BigQueryV2Config, "get_projects_client") +def test_gen_table_dataset_workunits( + get_projects_client, get_bq_client_mock, bigquery_table +): project_id = "test-project" dataset_name = "test-dataset" config = BigQueryV2Config.parse_obj( @@ -471,7 +544,8 @@ def test_gen_table_dataset_workunits(get_bq_client_mock, bigquery_table): @patch.object(BigQueryV2Config, "get_bigquery_client") -def test_simple_upstream_table_generation(get_bq_client_mock): +@patch.object(BigQueryV2Config, "get_projects_client") +def test_simple_upstream_table_generation(get_bq_client_mock, get_projects_client): a: BigQueryTableRef = BigQueryTableRef( BigqueryTableIdentifier( project_id="test-project", dataset="test-dataset", table="a" @@ -503,8 +577,10 @@ def test_simple_upstream_table_generation(get_bq_client_mock): @patch.object(BigQueryV2Config, "get_bigquery_client") +@patch.object(BigQueryV2Config, "get_projects_client") def test_upstream_table_generation_with_temporary_table_without_temp_upstream( get_bq_client_mock, + get_projects_client, ): a: BigQueryTableRef = BigQueryTableRef( BigqueryTableIdentifier( @@ -536,7 +612,10 @@ def test_upstream_table_generation_with_temporary_table_without_temp_upstream( @patch.object(BigQueryV2Config, "get_bigquery_client") -def test_upstream_table_column_lineage_with_temp_table(get_bq_client_mock): +@patch.object(BigQueryV2Config, "get_projects_client") +def test_upstream_table_column_lineage_with_temp_table( + get_bq_client_mock, get_projects_client +): from datahub.ingestion.api.common import PipelineContext a: BigQueryTableRef = BigQueryTableRef( @@ -611,8 +690,9 @@ def test_upstream_table_column_lineage_with_temp_table(get_bq_client_mock): @patch.object(BigQueryV2Config, "get_bigquery_client") +@patch.object(BigQueryV2Config, "get_projects_client") def test_upstream_table_generation_with_temporary_table_with_multiple_temp_upstream( - get_bq_client_mock, + get_bq_client_mock, get_projects_client ): a: BigQueryTableRef = BigQueryTableRef( BigqueryTableIdentifier( @@ -675,7 +755,10 @@ def test_upstream_table_generation_with_temporary_table_with_multiple_temp_upstr @patch.object(BigQuerySchemaApi, "get_tables_for_dataset") @patch.object(BigQueryV2Config, "get_bigquery_client") -def test_table_processing_logic(get_bq_client_mock, data_dictionary_mock): +@patch.object(BigQueryV2Config, "get_projects_client") +def test_table_processing_logic( + get_projects_client, get_bq_client_mock, data_dictionary_mock +): client_mock = MagicMock() get_bq_client_mock.return_value = client_mock config = BigQueryV2Config.parse_obj( @@ -747,8 +830,9 @@ def test_table_processing_logic(get_bq_client_mock, data_dictionary_mock): @patch.object(BigQuerySchemaApi, "get_tables_for_dataset") @patch.object(BigQueryV2Config, "get_bigquery_client") +@patch.object(BigQueryV2Config, "get_projects_client") def test_table_processing_logic_date_named_tables( - get_bq_client_mock, data_dictionary_mock + get_projects_client, get_bq_client_mock, data_dictionary_mock ): client_mock = MagicMock() get_bq_client_mock.return_value = client_mock @@ -859,8 +943,10 @@ def bigquery_view_2() -> BigqueryView: @patch.object(BigQuerySchemaApi, "get_query_result") @patch.object(BigQueryV2Config, "get_bigquery_client") +@patch.object(BigQueryV2Config, "get_projects_client") def test_get_views_for_dataset( get_bq_client_mock: Mock, + get_projects_client: MagicMock, query_mock: Mock, bigquery_view_1: BigqueryView, bigquery_view_2: BigqueryView, @@ -889,7 +975,9 @@ def test_get_views_for_dataset( ) query_mock.return_value = [row1, row2] bigquery_data_dictionary = BigQuerySchemaApi( - BigQueryV2Report().schema_api_perf, client_mock + report=BigQueryV2Report().schema_api_perf, + client=client_mock, + projects_client=MagicMock(), ) views = bigquery_data_dictionary.get_views_for_dataset( @@ -905,8 +993,9 @@ def test_get_views_for_dataset( BigQuerySchemaGenerator, "gen_dataset_workunits", lambda *args, **kwargs: [] ) @patch.object(BigQueryV2Config, "get_bigquery_client") +@patch.object(BigQueryV2Config, "get_projects_client") def test_gen_view_dataset_workunits( - get_bq_client_mock, bigquery_view_1, bigquery_view_2 + get_projects_client, get_bq_client_mock, bigquery_view_1, bigquery_view_2 ): project_id = "test-project" dataset_name = "test-dataset" @@ -963,7 +1052,9 @@ def bigquery_snapshot() -> BigqueryTableSnapshot: @patch.object(BigQuerySchemaApi, "get_query_result") @patch.object(BigQueryV2Config, "get_bigquery_client") +@patch.object(BigQueryV2Config, "get_projects_client") def test_get_snapshots_for_dataset( + get_projects_client: MagicMock, get_bq_client_mock: Mock, query_mock: Mock, bigquery_snapshot: BigqueryTableSnapshot, @@ -988,7 +1079,9 @@ def test_get_snapshots_for_dataset( ) query_mock.return_value = [row1] bigquery_data_dictionary = BigQuerySchemaApi( - BigQueryV2Report().schema_api_perf, client_mock + report=BigQueryV2Report().schema_api_perf, + client=client_mock, + projects_client=MagicMock(), ) snapshots = bigquery_data_dictionary.get_snapshots_for_dataset( @@ -1001,7 +1094,10 @@ def test_get_snapshots_for_dataset( @patch.object(BigQueryV2Config, "get_bigquery_client") -def test_gen_snapshot_dataset_workunits(get_bq_client_mock, bigquery_snapshot): +@patch.object(BigQueryV2Config, "get_projects_client") +def test_gen_snapshot_dataset_workunits( + get_bq_client_mock, get_projects_client, bigquery_snapshot +): project_id = "test-project" dataset_name = "test-dataset" config = BigQueryV2Config.parse_obj( @@ -1140,7 +1236,9 @@ def test_default_config_for_excluding_projects_and_datasets(): @patch.object(BigQueryConnectionConfig, "get_bigquery_client", new=lambda self: None) @patch.object(BigQuerySchemaApi, "get_datasets_for_project_id") +@patch.object(BigQueryV2Config, "get_projects_client") def test_excluding_empty_projects_from_ingestion( + get_projects_client, get_datasets_for_project_id_mock, ): project_id_with_datasets = "project-id-with-datasets" @@ -1173,3 +1271,62 @@ def get_datasets_for_project_id_side_effect( config = BigQueryV2Config.parse_obj({**base_config, "exclude_empty_projects": True}) source = BigqueryV2Source(config=config, ctx=PipelineContext(run_id="test-2")) assert len({wu.metadata.entityUrn for wu in source.get_workunits()}) == 1 # type: ignore + + +def test_bigquery_config_deprecated_schema_pattern(): + base_config = { + "include_usage_statistics": False, + "include_table_lineage": False, + } + + config = BigQueryV2Config.parse_obj(base_config) + assert config.dataset_pattern == AllowDenyPattern(allow=[".*"]) # default + + config_with_schema_pattern = { + **base_config, + "schema_pattern": AllowDenyPattern(deny=[".*"]), + } + config = BigQueryV2Config.parse_obj(config_with_schema_pattern) + assert config.dataset_pattern == AllowDenyPattern(deny=[".*"]) # schema_pattern + + config_with_dataset_pattern = { + **base_config, + "dataset_pattern": AllowDenyPattern(deny=["temp.*"]), + } + config = BigQueryV2Config.parse_obj(config_with_dataset_pattern) + assert config.dataset_pattern == AllowDenyPattern( + deny=["temp.*"] + ) # dataset_pattern + + +@patch.object(BigQueryV2Config, "get_bigquery_client") +@patch.object(BigQueryV2Config, "get_projects_client") +def test_get_projects_with_project_labels( + get_projects_client, + get_bq_client_mock, +): + client_mock = MagicMock() + + get_projects_client.return_value = client_mock + + client_mock.search_projects.return_value = [ + SimpleNamespace(project_id="dev", display_name="dev_project"), + SimpleNamespace(project_id="qa", display_name="qa_project"), + ] + + config = BigQueryV2Config.parse_obj( + { + "project_labels": ["environment:dev", "environment:qa"], + } + ) + + source = BigqueryV2Source(config=config, ctx=PipelineContext(run_id="test1")) + + assert get_projects( + source.bq_schema_extractor.schema_api, + source.report, + source.filters, + ) == [ + BigqueryProject("dev", "dev_project"), + BigqueryProject("qa", "qa_project"), + ] diff --git a/metadata-ingestion/tests/unit/test_bigquery_usage.py b/metadata-ingestion/tests/unit/test_bigquery_usage.py index f476e62dd01704..7ff83bff4a72a5 100644 --- a/metadata-ingestion/tests/unit/test_bigquery_usage.py +++ b/metadata-ingestion/tests/unit/test_bigquery_usage.py @@ -8,7 +8,6 @@ from freezegun import freeze_time from datahub.configuration.time_window_config import BucketDuration -from datahub.emitter.mce_builder import make_dataset_urn from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source.bigquery_v2.bigquery_audit import ( @@ -23,6 +22,7 @@ BigQueryV2Config, ) from datahub.ingestion.source.bigquery_v2.bigquery_report import BigQueryV2Report +from datahub.ingestion.source.bigquery_v2.common import BigQueryIdentifierBuilder from datahub.ingestion.source.bigquery_v2.usage import ( OPERATION_STATEMENT_TYPES, BigQueryUsageExtractor, @@ -162,21 +162,23 @@ def query_view_1_and_table_1(timestamp: datetime = TS_1, actor: str = ACTOR_1) - def make_usage_workunit( - table: Table, dataset_usage_statistics: DatasetUsageStatisticsClass + table: Table, + dataset_usage_statistics: DatasetUsageStatisticsClass, + identifiers: BigQueryIdentifierBuilder, ) -> MetadataWorkUnit: resource = BigQueryTableRef.from_string_name(TABLE_REFS[table.name]) return MetadataChangeProposalWrapper( - entityUrn=resource.to_urn("PROD"), + entityUrn=identifiers.gen_dataset_urn_from_raw_ref(resource), aspectName=dataset_usage_statistics.get_aspect_name(), aspect=dataset_usage_statistics, ).as_workunit() def make_operational_workunit( - resource: str, operation: OperationClass + resource_urn: str, operation: OperationClass ) -> MetadataWorkUnit: return MetadataChangeProposalWrapper( - entityUrn=BigQueryTableRef.from_string_name(resource).to_urn("PROD"), + entityUrn=resource_urn, aspectName=operation.get_aspect_name(), aspect=operation, ).as_workunit() @@ -204,14 +206,15 @@ def usage_extractor(config: BigQueryV2Config) -> BigQueryUsageExtractor: config, report, schema_resolver=SchemaResolver(platform="bigquery"), - dataset_urn_builder=lambda ref: make_dataset_urn( - "bigquery", str(ref.table_identifier) - ), + identifiers=BigQueryIdentifierBuilder(config, report), ) def make_zero_usage_workunit( - table: Table, time: datetime, bucket_duration: BucketDuration = BucketDuration.DAY + table: Table, + time: datetime, + identifiers: BigQueryIdentifierBuilder, + bucket_duration: BucketDuration = BucketDuration.DAY, ) -> MetadataWorkUnit: return make_usage_workunit( table=table, @@ -224,6 +227,7 @@ def make_zero_usage_workunit( userCounts=[], fieldCounts=[], ), + identifiers=identifiers, ) @@ -294,9 +298,10 @@ def test_usage_counts_single_bucket_resource_project( ), ], ), + identifiers=usage_extractor.identifiers, ), - make_zero_usage_workunit(TABLE_2, TS_1), - make_zero_usage_workunit(VIEW_1, TS_1), + make_zero_usage_workunit(TABLE_2, TS_1, usage_extractor.identifiers), + make_zero_usage_workunit(VIEW_1, TS_1, usage_extractor.identifiers), ] compare_workunits(workunits, expected) @@ -377,6 +382,7 @@ def test_usage_counts_multiple_buckets_and_resources_view_usage( ), ], ), + identifiers=usage_extractor.identifiers, ), make_usage_workunit( table=VIEW_1, @@ -404,6 +410,7 @@ def test_usage_counts_multiple_buckets_and_resources_view_usage( ], fieldCounts=[], ), + identifiers=usage_extractor.identifiers, ), make_usage_workunit( table=TABLE_2, @@ -435,6 +442,7 @@ def test_usage_counts_multiple_buckets_and_resources_view_usage( ), ], ), + identifiers=usage_extractor.identifiers, ), # TS 2 make_usage_workunit( @@ -479,6 +487,7 @@ def test_usage_counts_multiple_buckets_and_resources_view_usage( ), ], ), + identifiers=usage_extractor.identifiers, ), make_usage_workunit( table=VIEW_1, @@ -499,6 +508,7 @@ def test_usage_counts_multiple_buckets_and_resources_view_usage( ], fieldCounts=[], ), + identifiers=usage_extractor.identifiers, ), make_usage_workunit( table=TABLE_2, @@ -532,6 +542,7 @@ def test_usage_counts_multiple_buckets_and_resources_view_usage( ), ], ), + identifiers=usage_extractor.identifiers, ), ] compare_workunits(workunits, expected) @@ -622,6 +633,7 @@ def test_usage_counts_multiple_buckets_and_resources_no_view_usage( ), ], ), + identifiers=usage_extractor.identifiers, ), make_usage_workunit( table=TABLE_2, @@ -664,6 +676,7 @@ def test_usage_counts_multiple_buckets_and_resources_no_view_usage( ), ], ), + identifiers=usage_extractor.identifiers, ), # TS 2 make_usage_workunit( @@ -713,6 +726,7 @@ def test_usage_counts_multiple_buckets_and_resources_no_view_usage( ), ], ), + identifiers=usage_extractor.identifiers, ), make_usage_workunit( table=TABLE_2, @@ -764,8 +778,9 @@ def test_usage_counts_multiple_buckets_and_resources_no_view_usage( ), ], ), + identifiers=usage_extractor.identifiers, ), - make_zero_usage_workunit(VIEW_1, TS_1), + make_zero_usage_workunit(VIEW_1, TS_1, usage_extractor.identifiers), # TS_2 not included as only 1 minute of it was ingested ] compare_workunits(workunits, expected) @@ -793,7 +808,7 @@ def test_usage_counts_no_query_event( workunits = usage_extractor._get_workunits_internal([event], [str(ref)]) expected = [ MetadataChangeProposalWrapper( - entityUrn=ref.to_urn("PROD"), + entityUrn=usage_extractor.identifiers.gen_dataset_urn_from_raw_ref(ref), aspect=DatasetUsageStatisticsClass( timestampMillis=int(TS_1.timestamp() * 1000), eventGranularity=TimeWindowSizeClass( @@ -872,6 +887,7 @@ def test_usage_counts_no_columns( ], fieldCounts=[], ), + identifiers=usage_extractor.identifiers, ) ] compare_workunits(workunits, expected) @@ -991,6 +1007,7 @@ def test_usage_counts_no_columns_and_top_n_limit_hit( ], fieldCounts=[], ), + identifiers=usage_extractor.identifiers, ) ] compare_workunits(workunits, expected) @@ -1036,7 +1053,11 @@ def test_operational_stats( workunits = usage_extractor._get_workunits_internal(events, table_refs.values()) expected = [ make_operational_workunit( - table_refs[query.object_modified.name], + usage_extractor.identifiers.gen_dataset_urn_from_raw_ref( + BigQueryTableRef.from_string_name( + table_refs[query.object_modified.name] + ) + ), OperationClass( timestampMillis=int(FROZEN_TIME.timestamp() * 1000), lastUpdatedTimestamp=int(query.timestamp.timestamp() * 1000), @@ -1053,18 +1074,20 @@ def test_operational_stats( ), affectedDatasets=list( dict.fromkeys( # Preserve order - BigQueryTableRef.from_string_name( - table_refs[field.table.name] - ).to_urn("PROD") + usage_extractor.identifiers.gen_dataset_urn_from_raw_ref( + BigQueryTableRef.from_string_name( + table_refs[field.table.name] + ) + ) for field in query.fields_accessed if not field.table.is_view() ) ) + list( dict.fromkeys( # Preserve order - BigQueryTableRef.from_string_name( - table_refs[parent.name] - ).to_urn("PROD") + usage_extractor.identifiers.gen_dataset_urn_from_raw_ref( + BigQueryTableRef.from_string_name(table_refs[parent.name]) + ) for field in query.fields_accessed if field.table.is_view() for parent in field.table.upstreams diff --git a/metadata-ingestion/tests/unit/test_bigqueryv2_usage_source.py b/metadata-ingestion/tests/unit/test_bigqueryv2_usage_source.py index 21787af1b0cb9a..63de742b201a97 100644 --- a/metadata-ingestion/tests/unit/test_bigqueryv2_usage_source.py +++ b/metadata-ingestion/tests/unit/test_bigqueryv2_usage_source.py @@ -12,6 +12,7 @@ unquote_and_decode_unicode_escape_seq, ) from datahub.ingestion.source.bigquery_v2.bigquery_report import BigQueryV2Report +from datahub.ingestion.source.bigquery_v2.common import BigQueryIdentifierBuilder from datahub.ingestion.source.bigquery_v2.usage import BigQueryUsageExtractor from datahub.sql_parsing.schema_resolver import SchemaResolver @@ -117,11 +118,12 @@ def test_bigqueryv2_filters(): corrected_start_time = config.start_time - config.max_query_duration corrected_end_time = config.end_time + config.max_query_duration + report = BigQueryV2Report() filter: str = BigQueryUsageExtractor( config, - BigQueryV2Report(), + report, schema_resolver=SchemaResolver(platform="bigquery"), - dataset_urn_builder=lambda x: "", + identifiers=BigQueryIdentifierBuilder(config, report), )._generate_filter(corrected_start_time, corrected_end_time) assert filter == expected_filter diff --git a/metadata-ingestion/tests/unit/test_dbt_source.py b/metadata-ingestion/tests/unit/test_dbt_source.py index 01d7a4809b01b8..90ff78b16f652b 100644 --- a/metadata-ingestion/tests/unit/test_dbt_source.py +++ b/metadata-ingestion/tests/unit/test_dbt_source.py @@ -247,7 +247,6 @@ def test_dbt_config_prefer_sql_parser_lineage(): "catalog_path": "dummy_path", "target_platform": "dummy_platform", "skip_sources_in_lineage": True, - "entities_enabled": {"sources": "NO"}, "prefer_sql_parser_lineage": True, } config = DBTCoreConfig.parse_obj(config_dict) diff --git a/metadata-ingestion/tests/unit/test_mapping.py b/metadata-ingestion/tests/unit/test_mapping.py index 0e176710bb4f39..b4168af3029d04 100644 --- a/metadata-ingestion/tests/unit/test_mapping.py +++ b/metadata-ingestion/tests/unit/test_mapping.py @@ -186,7 +186,7 @@ def test_operation_processor_advanced_matching_owners(): def test_operation_processor_ownership_category(): raw_props = { "user_owner": "@test_user", - "business_owner": "alice", + "business_owner": "alice,urn:li:corpGroup:biz-data-team", "architect": "bob", } processor = OperationProcessor( @@ -222,18 +222,24 @@ def test_operation_processor_ownership_category(): assert "add_owner" in aspect_map ownership_aspect: OwnershipClass = aspect_map["add_owner"] - assert len(ownership_aspect.owners) == 3 + assert len(ownership_aspect.owners) == 4 + new_owner: OwnerClass = ownership_aspect.owners[0] + assert new_owner.owner == "urn:li:corpGroup:biz-data-team" + assert new_owner.source and new_owner.source.type == "SOURCE_CONTROL" + assert new_owner.type and new_owner.type == OwnershipTypeClass.BUSINESS_OWNER + + new_owner = ownership_aspect.owners[1] assert new_owner.owner == "urn:li:corpGroup:test_user" assert new_owner.source and new_owner.source.type == "SOURCE_CONTROL" assert new_owner.type and new_owner.type == OwnershipTypeClass.DATA_STEWARD - new_owner = ownership_aspect.owners[1] + new_owner = ownership_aspect.owners[2] assert new_owner.owner == "urn:li:corpuser:alice" assert new_owner.source and new_owner.source.type == "SOURCE_CONTROL" assert new_owner.type and new_owner.type == OwnershipTypeClass.BUSINESS_OWNER - new_owner = ownership_aspect.owners[2] + new_owner = ownership_aspect.owners[3] assert new_owner.owner == "urn:li:corpuser:bob" assert new_owner.source and new_owner.source.type == "SOURCE_CONTROL" assert new_owner.type == OwnershipTypeClass.CUSTOM diff --git a/metadata-ingestion/tests/unit/test_tableau_source.py b/metadata-ingestion/tests/unit/test_tableau_source.py index f5410b161ed703..1cd0557d085f19 100644 --- a/metadata-ingestion/tests/unit/test_tableau_source.py +++ b/metadata-ingestion/tests/unit/test_tableau_source.py @@ -1,8 +1,37 @@ +from typing import Any, Dict + import pytest import datahub.ingestion.source.tableau_constant as c from datahub.ingestion.source.tableau import TableauSiteSource -from datahub.ingestion.source.tableau_common import get_filter_pages, make_filter +from datahub.ingestion.source.tableau_common import ( + get_filter_pages, + make_filter, + tableau_field_to_schema_field, +) +from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaField + + +def test_tablea_source_handles_none_nativedatatype(): + field: Dict[str, Any] = { + "__typename": "CalculatedField", + "id": "abcd", + "name": "Test Field", + "description": None, + "isHidden": False, + "folderName": None, + "upstreamFields": [], + "upstreamColumns": [], + "role": None, + "dataType": None, + "defaultFormat": "s", + "aggregation": None, + "formula": "a/b + d", + } + schema_field: SchemaField = tableau_field_to_schema_field( + field=field, ingest_tags=False + ) + assert schema_field.nativeDataType == "UNKNOWN" def test_tableau_source_unescapes_lt(): diff --git a/metadata-io/build.gradle b/metadata-io/build.gradle index 17d9cb8cd14fee..ff29cb5fff47d2 100644 --- a/metadata-io/build.gradle +++ b/metadata-io/build.gradle @@ -130,7 +130,6 @@ test { // override, testng controlling parallelization // increasing >1 will merely run all tests extra times maxParallelForks = 1 - environment "ELASTIC_ID_HASH_ALGO", "MD5" } useTestNG() { suites 'src/test/resources/testng.xml' diff --git a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java index 0808c29e8ea892..3ec090a3db3a45 100644 --- a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java +++ b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java @@ -170,16 +170,22 @@ public AspectsBatchImplBuilder mcps( mcps.stream() .map( mcp -> { - if (mcp.getChangeType().equals(ChangeType.PATCH)) { - return PatchItemImpl.PatchItemImplBuilder.build( - mcp, - auditStamp, - retrieverContext.getAspectRetriever().getEntityRegistry()); - } else { - return ChangeItemImpl.ChangeItemImplBuilder.build( - mcp, auditStamp, retrieverContext.getAspectRetriever()); + try { + if (mcp.getChangeType().equals(ChangeType.PATCH)) { + return PatchItemImpl.PatchItemImplBuilder.build( + mcp, + auditStamp, + retrieverContext.getAspectRetriever().getEntityRegistry()); + } else { + return ChangeItemImpl.ChangeItemImplBuilder.build( + mcp, auditStamp, retrieverContext.getAspectRetriever()); + } + } catch (IllegalArgumentException e) { + log.error("Invalid proposal, skipping and proceeding with batch: " + mcp, e); + return null; } }) + .filter(Objects::nonNull) .collect(Collectors.toList())); return this; } diff --git a/metadata-io/metadata-io-api/src/test/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImplTest.java b/metadata-io/metadata-io-api/src/test/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImplTest.java index d2e7243d045604..31dd868b4cb4a3 100644 --- a/metadata-io/metadata-io-api/src/test/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImplTest.java +++ b/metadata-io/metadata-io-api/src/test/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImplTest.java @@ -1,22 +1,26 @@ package com.linkedin.metadata.entity.ebean.batch; -import static com.linkedin.metadata.Constants.DATASET_ENTITY_NAME; -import static com.linkedin.metadata.Constants.STATUS_ASPECT_NAME; -import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTIES_ASPECT_NAME; +import static com.linkedin.metadata.Constants.*; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; import static org.testng.Assert.assertEquals; +import com.google.common.collect.ImmutableList; +import com.linkedin.common.FabricType; import com.linkedin.common.Status; +import com.linkedin.common.urn.DataPlatformUrn; +import com.linkedin.common.urn.DatasetUrn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.ByteString; import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; +import com.linkedin.dataset.DatasetProperties; import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.GraphRetriever; import com.linkedin.metadata.aspect.batch.MCPItem; import com.linkedin.metadata.aspect.patch.GenericJsonPatch; import com.linkedin.metadata.aspect.patch.PatchOperationType; +import com.linkedin.metadata.aspect.patch.builder.DatasetPropertiesPatchBuilder; import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; import com.linkedin.metadata.aspect.plugins.hooks.MutationHook; import com.linkedin.metadata.entity.SearchRetriever; @@ -297,6 +301,38 @@ public void toUpsertBatchItemsProposedItemTest() { "Mutation to status aspect"); } + @Test + public void singleInvalidDoesntBreakBatch() { + MetadataChangeProposal proposal1 = + new DatasetPropertiesPatchBuilder() + .urn(new DatasetUrn(new DataPlatformUrn("platform"), "name", FabricType.PROD)) + .setDescription("something") + .setName("name") + .addCustomProperty("prop1", "propVal1") + .addCustomProperty("prop2", "propVal2") + .build(); + MetadataChangeProposal proposal2 = + new MetadataChangeProposal() + .setEntityType(DATASET_ENTITY_NAME) + .setAspectName(DATASET_PROPERTIES_ASPECT_NAME) + .setAspect(GenericRecordUtils.serializeAspect(new DatasetProperties())) + .setChangeType(ChangeType.UPSERT); + + AspectsBatchImpl testBatch = + AspectsBatchImpl.builder() + .mcps( + ImmutableList.of(proposal1, proposal2), + AuditStampUtils.createDefaultAuditStamp(), + retrieverContext) + .retrieverContext(retrieverContext) + .build(); + + assertEquals( + testBatch.toUpsertBatchItems(Map.of()).getSecond().size(), + 1, + "Expected 1 valid mcp to be passed through."); + } + /** Converts unsupported to status aspect */ @Getter @Setter diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java index 5b0fb554a4f48d..e1532ea4e26c06 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java @@ -64,6 +64,7 @@ public class ElasticSearchGraphService implements GraphService, ElasticSearchInd private final ESGraphWriteDAO _graphWriteDAO; private final ESGraphQueryDAO _graphReadDAO; private final ESIndexBuilder _indexBuilder; + private final String idHashAlgo; public static final String INDEX_NAME = "graph_service_v1"; private static final Map EMPTY_HASH = new HashMap<>(); @@ -125,7 +126,7 @@ public LineageRegistry getLineageRegistry() { @Override public void addEdge(@Nonnull final Edge edge) { - String docId = edge.toDocId(); + String docId = edge.toDocId(idHashAlgo); String edgeDocument = toDocument(edge); _graphWriteDAO.upsertDocument(docId, edgeDocument); } @@ -137,7 +138,7 @@ public void upsertEdge(@Nonnull final Edge edge) { @Override public void removeEdge(@Nonnull final Edge edge) { - String docId = edge.toDocId(); + String docId = edge.toDocId(idHashAlgo); _graphWriteDAO.deleteDocument(docId); } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java index dff0a99a142b73..2ab9e17f281637 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java @@ -80,6 +80,7 @@ public class UpdateIndicesService implements SearchIndicesService { private final SystemMetadataService _systemMetadataService; private final SearchDocumentTransformer _searchDocumentTransformer; private final EntityIndexBuilders _entityIndexBuilders; + @Nonnull private final String idHashAlgo; @Value("${featureFlags.graphServiceDiffModeEnabled:true}") private boolean _graphDiffMode; @@ -117,13 +118,15 @@ public UpdateIndicesService( TimeseriesAspectService timeseriesAspectService, SystemMetadataService systemMetadataService, SearchDocumentTransformer searchDocumentTransformer, - EntityIndexBuilders entityIndexBuilders) { + EntityIndexBuilders entityIndexBuilders, + @Nonnull String idHashAlgo) { _graphService = graphService; _entitySearchService = entitySearchService; _timeseriesAspectService = timeseriesAspectService; _systemMetadataService = systemMetadataService; _searchDocumentTransformer = searchDocumentTransformer; _entityIndexBuilders = entityIndexBuilders; + this.idHashAlgo = idHashAlgo; } @Override @@ -601,7 +604,9 @@ private void updateTimeseriesFields( SystemMetadata systemMetadata) { Map documents; try { - documents = TimeseriesAspectTransformer.transform(urn, aspect, aspectSpec, systemMetadata); + documents = + TimeseriesAspectTransformer.transform( + urn, aspect, aspectSpec, systemMetadata, idHashAlgo); } catch (JsonProcessingException e) { log.error("Failed to generate timeseries document from aspect: {}", e.toString()); return; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ESSystemMetadataDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ESSystemMetadataDAO.java index cf1674ac004809..a5c2fb04b5ce39 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ESSystemMetadataDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ESSystemMetadataDAO.java @@ -54,10 +54,7 @@ public Optional getTaskStatus(@Nonnull String nodeId, long task try { return client.tasks().get(taskRequest, RequestOptions.DEFAULT); } catch (IOException e) { - log.error( - String.format( - "ERROR: Failed to get task status for %s:%d. See stacktrace for a more detailed error:", - nodeId, taskId)); + log.error("ERROR: Failed to get task status: ", e); e.printStackTrace(); } return Optional.empty(); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataService.java b/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataService.java index cdfc4e985293f6..fe79ba75cb1d14 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataService.java @@ -52,6 +52,7 @@ public class ElasticSearchSystemMetadataService private final IndexConvention _indexConvention; private final ESSystemMetadataDAO _esDAO; private final ESIndexBuilder _indexBuilder; + @Nonnull private final String elasticIdHashAlgo; private static final String DOC_DELIMETER = "--"; public static final String INDEX_NAME = "system_metadata_service_v1"; @@ -86,10 +87,9 @@ private String toDocument(SystemMetadata systemMetadata, String urn, String aspe private String toDocId(@Nonnull final String urn, @Nonnull final String aspect) { String rawDocId = urn + DOC_DELIMETER + aspect; - String hashAlgo = System.getenv("ELASTIC_ID_HASH_ALGO"); try { byte[] bytesOfRawDocID = rawDocId.getBytes(StandardCharsets.UTF_8); - MessageDigest md = MessageDigest.getInstance(hashAlgo); + MessageDigest md = MessageDigest.getInstance(elasticIdHashAlgo); byte[] thedigest = md.digest(bytesOfRawDocID); return Base64.getEncoder().encodeToString(thedigest); } catch (NoSuchAlgorithmException e) { diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/transformer/TimeseriesAspectTransformer.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/transformer/TimeseriesAspectTransformer.java index cf0a3f1466d254..c353e601a31b70 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/transformer/TimeseriesAspectTransformer.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/transformer/TimeseriesAspectTransformer.java @@ -54,7 +54,8 @@ public static Map transform( @Nonnull final Urn urn, @Nonnull final RecordTemplate timeseriesAspect, @Nonnull final AspectSpec aspectSpec, - @Nullable final SystemMetadata systemMetadata) + @Nullable final SystemMetadata systemMetadata, + @Nonnull final String idHashAlgo) throws JsonProcessingException { ObjectNode commonDocument = getCommonDocument(urn, timeseriesAspect, systemMetadata); Map finalDocuments = new HashMap<>(); @@ -74,7 +75,7 @@ public static Map transform( final Map> timeseriesFieldValueMap = FieldExtractor.extractFields(timeseriesAspect, aspectSpec.getTimeseriesFieldSpecs()); timeseriesFieldValueMap.forEach((k, v) -> setTimeseriesField(document, k, v)); - finalDocuments.put(getDocId(document, null), document); + finalDocuments.put(getDocId(document, null, idHashAlgo), document); // Create new rows for the member collection fields. final Map> timeseriesFieldCollectionValueMap = @@ -83,7 +84,7 @@ public static Map transform( timeseriesFieldCollectionValueMap.forEach( (key, values) -> finalDocuments.putAll( - getTimeseriesFieldCollectionDocuments(key, values, commonDocument))); + getTimeseriesFieldCollectionDocuments(key, values, commonDocument, idHashAlgo))); return finalDocuments; } @@ -216,12 +217,13 @@ private static void setTimeseriesField( private static Map getTimeseriesFieldCollectionDocuments( final TimeseriesFieldCollectionSpec fieldSpec, final List values, - final ObjectNode commonDocument) { + final ObjectNode commonDocument, + @Nonnull final String idHashAlgo) { return values.stream() .map(value -> getTimeseriesFieldCollectionDocument(fieldSpec, value, commonDocument)) .collect( Collectors.toMap( - keyDocPair -> getDocId(keyDocPair.getSecond(), keyDocPair.getFirst()), + keyDocPair -> getDocId(keyDocPair.getSecond(), keyDocPair.getFirst(), idHashAlgo), Pair::getSecond)); } @@ -257,9 +259,9 @@ private static Pair getTimeseriesFieldCollectionDocument( finalDocument); } - private static String getDocId(@Nonnull JsonNode document, String collectionId) + private static String getDocId( + @Nonnull JsonNode document, String collectionId, @Nonnull String idHashAlgo) throws IllegalArgumentException { - String hashAlgo = System.getenv("ELASTIC_ID_HASH_ALGO"); String docId = document.get(MappingsBuilder.TIMESTAMP_MILLIS_FIELD).toString(); JsonNode eventGranularity = document.get(MappingsBuilder.EVENT_GRANULARITY); if (eventGranularity != null) { @@ -278,9 +280,9 @@ private static String getDocId(@Nonnull JsonNode document, String collectionId) docId += partitionSpec.toString(); } - if (hashAlgo.equalsIgnoreCase("SHA-256")) { + if (idHashAlgo.equalsIgnoreCase("SHA-256")) { return DigestUtils.sha256Hex(docId); - } else if (hashAlgo.equalsIgnoreCase("MD5")) { + } else if (idHashAlgo.equalsIgnoreCase("MD5")) { return DigestUtils.md5Hex(docId); } throw new IllegalArgumentException("Hash function not handled !"); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/search/SearchGraphServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/SearchGraphServiceTestBase.java index 06f1369ff0670c..d1a51b1d69b2c3 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/search/SearchGraphServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/SearchGraphServiceTestBase.java @@ -62,7 +62,7 @@ public abstract class SearchGraphServiceTestBase extends GraphServiceTestBase { @Nonnull protected abstract ESIndexBuilder getIndexBuilder(); - private final IndexConvention _indexConvention = IndexConventionImpl.NO_PREFIX; + private final IndexConvention _indexConvention = IndexConventionImpl.noPrefix("MD5"); private final String _indexName = _indexConvention.getIndexName(INDEX_NAME); private ElasticSearchGraphService _client; @@ -108,7 +108,8 @@ private ElasticSearchGraphService buildService(boolean enableMultiPathSearch) { _indexConvention, writeDAO, readDAO, - getIndexBuilder()); + getIndexBuilder(), + "MD5"); } @Override diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java index a9d84ae1f3aea1..99e4923885a41d 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java @@ -122,7 +122,7 @@ public void setup() throws RemoteInvocationException, URISyntaxException { operationContext = TestOperationContexts.systemContextNoSearchAuthorization( new SnapshotEntityRegistry(new Snapshot()), - new IndexConventionImpl("lineage_search_service_test")) + new IndexConventionImpl("lineage_search_service_test", "MD5")) .asSession(RequestContext.TEST, Authorizer.EMPTY, TestOperationContexts.TEST_USER_AUTH); settingsBuilder = new SettingsBuilder(null); elasticSearchService = buildEntitySearchService(); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTestBase.java index 445b71b2eaff62..5e30e01a8ea690 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTestBase.java @@ -79,7 +79,7 @@ public void setup() throws RemoteInvocationException, URISyntaxException { operationContext = TestOperationContexts.systemContextNoSearchAuthorization( new SnapshotEntityRegistry(new Snapshot()), - new IndexConventionImpl("search_service_test")) + new IndexConventionImpl("search_service_test", "MD5")) .asSession(RequestContext.TEST, Authorizer.EMPTY, TestOperationContexts.TEST_USER_AUTH); settingsBuilder = new SettingsBuilder(null); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/TestEntityTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/TestEntityTestBase.java index ab5e90f77c21aa..282a3d8e3ea6ae 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/TestEntityTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/TestEntityTestBase.java @@ -62,7 +62,8 @@ public abstract class TestEntityTestBase extends AbstractTestNGSpringContextTest public void setup() { opContext = TestOperationContexts.systemContextNoSearchAuthorization( - new SnapshotEntityRegistry(new Snapshot()), new IndexConventionImpl("es_service_test")); + new SnapshotEntityRegistry(new Snapshot()), + new IndexConventionImpl("es_service_test", "MD5")); settingsBuilder = new SettingsBuilder(null); elasticSearchService = buildService(); elasticSearchService.reindexAll(Collections.emptySet()); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/BrowseDAOTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/BrowseDAOTest.java index a0288d019644bd..8044515e3dc6a7 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/BrowseDAOTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/BrowseDAOTest.java @@ -45,7 +45,7 @@ public void setup() throws RemoteInvocationException, URISyntaxException { mockClient = mock(RestHighLevelClient.class); opContext = TestOperationContexts.systemContextNoSearchAuthorization( - new IndexConventionImpl("es_browse_dao_test")); + new IndexConventionImpl("es_browse_dao_test", "MD5")); browseDAO = new ESBrowseDAO(mockClient, searchConfiguration, customSearchConfiguration); } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/systemmetadata/SystemMetadataServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/systemmetadata/SystemMetadataServiceTestBase.java index d843191bed7413..1b9d8c57b4cad3 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/systemmetadata/SystemMetadataServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/systemmetadata/SystemMetadataServiceTestBase.java @@ -32,7 +32,7 @@ public abstract class SystemMetadataServiceTestBase extends AbstractTestNGSpring protected abstract ESIndexBuilder getIndexBuilder(); private final IndexConvention _indexConvention = - new IndexConventionImpl("es_system_metadata_service_test"); + new IndexConventionImpl("es_system_metadata_service_test", "MD5"); private ElasticSearchSystemMetadataService _client; @@ -54,7 +54,7 @@ private ElasticSearchSystemMetadataService buildService() { ESSystemMetadataDAO dao = new ESSystemMetadataDAO(getSearchClient(), _indexConvention, getBulkProcessor(), 1); return new ElasticSearchSystemMetadataService( - getBulkProcessor(), _indexConvention, dao, getIndexBuilder()); + getBulkProcessor(), _indexConvention, dao, getIndexBuilder(), "MD5"); } @Test diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java index 10c6f09cb8f8d6..414183c8882f9c 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java @@ -126,7 +126,7 @@ public void setup() throws RemoteInvocationException, URISyntaxException { opContext = TestOperationContexts.systemContextNoSearchAuthorization( - entityRegistry, new IndexConventionImpl("es_timeseries_aspect_service_test")); + entityRegistry, new IndexConventionImpl("es_timeseries_aspect_service_test", "MD5")); elasticSearchTimeseriesAspectService = buildService(); elasticSearchTimeseriesAspectService.reindexAll(Collections.emptySet()); @@ -152,7 +152,7 @@ private ElasticSearchTimeseriesAspectService buildService() { private void upsertDocument(TestEntityProfile dp, Urn urn) throws JsonProcessingException { Map documents = - TimeseriesAspectTransformer.transform(urn, dp, aspectSpec, null); + TimeseriesAspectTransformer.transform(urn, dp, aspectSpec, null, "MD5"); assertEquals(documents.size(), 3); documents.forEach( (key, value) -> diff --git a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java index 28a4a2b00cd6f1..6a95d16c254370 100644 --- a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java +++ b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java @@ -86,12 +86,12 @@ protected String longTailIndexPrefix() { @Bean(name = "sampleDataIndexConvention") protected IndexConvention indexConvention(@Qualifier("sampleDataPrefix") String prefix) { - return new IndexConventionImpl(prefix); + return new IndexConventionImpl(prefix, "MD5"); } @Bean(name = "longTailIndexConvention") protected IndexConvention longTailIndexConvention(@Qualifier("longTailPrefix") String prefix) { - return new IndexConventionImpl(prefix); + return new IndexConventionImpl(prefix, "MD5"); } @Bean(name = "sampleDataFixtureName") diff --git a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java index e783c011de6d0e..33e04af83c0a3a 100644 --- a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java +++ b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java @@ -71,7 +71,7 @@ protected String indexPrefix() { @Bean(name = "searchLineageIndexConvention") protected IndexConvention indexConvention(@Qualifier("searchLineagePrefix") String prefix) { - return new IndexConventionImpl(prefix); + return new IndexConventionImpl(prefix, "MD5"); } @Bean(name = "searchLineageFixtureName") @@ -173,7 +173,8 @@ protected ElasticSearchGraphService graphService( new ESGraphWriteDAO(indexConvention, bulkProcessor, 1), new ESGraphQueryDAO( searchClient, lineageRegistry, indexConvention, getGraphQueryConfiguration()), - indexBuilder); + indexBuilder, + indexConvention.getIdHashAlgo()); graphService.reindexAll(Collections.emptySet()); return graphService; } diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/MetadataChangeLogHook.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/MetadataChangeLogHook.java index 06a184c9f89f9c..876df4279b7b8a 100644 --- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/MetadataChangeLogHook.java +++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/MetadataChangeLogHook.java @@ -30,9 +30,7 @@ default MetadataChangeLogHook init(@Nonnull OperationContext systemOperationCont * Return whether the hook is enabled or not. If not enabled, the below invoke method is not * triggered */ - default boolean isEnabled() { - return true; - } + boolean isEnabled(); /** Invoke the hook when a MetadataChangeLog is received */ void invoke(@Nonnull MetadataChangeLog log) throws Exception; diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHookTest.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHookTest.java index 411fe02260bb1b..4cd59992eb2f00 100644 --- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHookTest.java +++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHookTest.java @@ -125,7 +125,8 @@ public void setupTest() { mockTimeseriesAspectService, mockSystemMetadataService, searchDocumentTransformer, - mockEntityIndexBuilders); + mockEntityIndexBuilders, + "MD5"); OperationContext systemOperationContext = TestOperationContexts.systemContextNoSearchAuthorization(); @@ -235,7 +236,8 @@ public void testInputFieldsEdgesAreAdded() throws Exception { mockTimeseriesAspectService, mockSystemMetadataService, searchDocumentTransformer, - mockEntityIndexBuilders); + mockEntityIndexBuilders, + "MD5"); updateIndicesHook = new UpdateIndicesHook(updateIndicesService, true, false); updateIndicesHook.init( diff --git a/metadata-jobs/pe-consumer/src/main/java/com/datahub/event/hook/PlatformEventHook.java b/metadata-jobs/pe-consumer/src/main/java/com/datahub/event/hook/PlatformEventHook.java index 37241861f2e5e6..7fcc2a07b950bf 100644 --- a/metadata-jobs/pe-consumer/src/main/java/com/datahub/event/hook/PlatformEventHook.java +++ b/metadata-jobs/pe-consumer/src/main/java/com/datahub/event/hook/PlatformEventHook.java @@ -20,9 +20,7 @@ default void init() {} * Return whether the hook is enabled or not. If not enabled, the below invoke method is not * triggered */ - default boolean isEnabled() { - return true; - } + boolean isEnabled(); /** Invoke the hook when a PlatformEvent is received */ void invoke(@Nonnull OperationContext opContext, @Nonnull PlatformEvent event); diff --git a/metadata-models/src/main/pegasus/com/linkedin/domain/DomainProperties.pdl b/metadata-models/src/main/pegasus/com/linkedin/domain/DomainProperties.pdl index eb307b726855db..2d93d9399bf722 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/domain/DomainProperties.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/domain/DomainProperties.pdl @@ -26,6 +26,10 @@ record DomainProperties includes CustomProperties { /** * Description of the Domain */ + @Searchable = { + "fieldType": "TEXT", + "hasValuesFieldName": "hasDescription" + } description: optional string /** diff --git a/metadata-models/src/main/pegasus/com/linkedin/settings/global/GlobalSettingsInfo.pdl b/metadata-models/src/main/pegasus/com/linkedin/settings/global/GlobalSettingsInfo.pdl index 8d4121b767dc38..6c6f4d0036ce03 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/settings/global/GlobalSettingsInfo.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/settings/global/GlobalSettingsInfo.pdl @@ -12,16 +12,17 @@ record GlobalSettingsInfo { * SSO integrations between DataHub and identity providers */ sso: optional SsoSettings + /** * Settings related to the Views Feature */ views: optional GlobalViewsSettings + /** * Settings related to the documentation propagation feature */ - docPropagation: DocPropagationFeatureSettings = { + docPropagation: optional DocPropagationFeatureSettings = { "enabled": true "columnPropagationEnabled": true } - } \ No newline at end of file diff --git a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/SearchContext.java b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/SearchContext.java index c067e91c3524cf..5ad7bdc14820c3 100644 --- a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/SearchContext.java +++ b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/SearchContext.java @@ -21,7 +21,7 @@ public class SearchContext implements ContextInterface { public static SearchContext EMPTY = - SearchContext.builder().indexConvention(IndexConventionImpl.NO_PREFIX).build(); + SearchContext.builder().indexConvention(IndexConventionImpl.noPrefix("")).build(); public static SearchContext withFlagDefaults( @Nonnull SearchContext searchContext, diff --git a/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java b/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java index e54c040fe13b58..76f58fb4751085 100644 --- a/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java +++ b/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java @@ -191,7 +191,7 @@ public static OperationContext systemContext( IndexConvention indexConvention = Optional.ofNullable(indexConventionSupplier) .map(Supplier::get) - .orElse(IndexConventionImpl.NO_PREFIX); + .orElse(IndexConventionImpl.noPrefix("MD5")); ServicesRegistryContext servicesRegistryContext = Optional.ofNullable(servicesRegistrySupplier).orElse(() -> null).get(); diff --git a/metadata-operation-context/src/test/java/io/datahubproject/metadata/context/SearchContextTest.java b/metadata-operation-context/src/test/java/io/datahubproject/metadata/context/SearchContextTest.java index 4858bb342258a5..2e0585cc82a4fd 100644 --- a/metadata-operation-context/src/test/java/io/datahubproject/metadata/context/SearchContextTest.java +++ b/metadata-operation-context/src/test/java/io/datahubproject/metadata/context/SearchContextTest.java @@ -12,26 +12,26 @@ public class SearchContextTest { @Test public void searchContextId() { SearchContext testNoFlags = - SearchContext.builder().indexConvention(IndexConventionImpl.NO_PREFIX).build(); + SearchContext.builder().indexConvention(IndexConventionImpl.noPrefix("MD5")).build(); assertEquals( testNoFlags.getCacheKeyComponent(), SearchContext.builder() - .indexConvention(IndexConventionImpl.NO_PREFIX) + .indexConvention(IndexConventionImpl.noPrefix("MD5")) .build() .getCacheKeyComponent(), "Expected consistent context ids across instances"); SearchContext testWithFlags = SearchContext.builder() - .indexConvention(IndexConventionImpl.NO_PREFIX) + .indexConvention(IndexConventionImpl.noPrefix("MD5")) .searchFlags(new SearchFlags().setFulltext(true)) .build(); assertEquals( testWithFlags.getCacheKeyComponent(), SearchContext.builder() - .indexConvention(IndexConventionImpl.NO_PREFIX) + .indexConvention(IndexConventionImpl.noPrefix("MD5")) .searchFlags(new SearchFlags().setFulltext(true)) .build() .getCacheKeyComponent(), @@ -44,7 +44,7 @@ public void searchContextId() { assertNotEquals( testWithFlags.getCacheKeyComponent(), SearchContext.builder() - .indexConvention(IndexConventionImpl.NO_PREFIX) + .indexConvention(IndexConventionImpl.noPrefix("MD5")) .searchFlags(new SearchFlags().setFulltext(true).setIncludeRestricted(true)) .build() .getCacheKeyComponent(), @@ -53,7 +53,7 @@ public void searchContextId() { assertNotEquals( testNoFlags.getCacheKeyComponent(), SearchContext.builder() - .indexConvention(new IndexConventionImpl("Some Prefix")) + .indexConvention(new IndexConventionImpl("Some Prefix", "MD5")) .searchFlags(null) .build() .getCacheKeyComponent(), @@ -61,7 +61,7 @@ public void searchContextId() { assertNotEquals( SearchContext.builder() - .indexConvention(IndexConventionImpl.NO_PREFIX) + .indexConvention(IndexConventionImpl.noPrefix("MD5")) .searchFlags( new SearchFlags() .setFulltext(false) @@ -70,7 +70,7 @@ public void searchContextId() { .build() .getCacheKeyComponent(), SearchContext.builder() - .indexConvention(IndexConventionImpl.NO_PREFIX) + .indexConvention(IndexConventionImpl.noPrefix("MD5")) .searchFlags(new SearchFlags().setFulltext(true).setIncludeRestricted(true)) .build() .getCacheKeyComponent(), @@ -80,7 +80,7 @@ public void searchContextId() { @Test public void testImmutableSearchFlags() { SearchContext initial = - SearchContext.builder().indexConvention(IndexConventionImpl.NO_PREFIX).build(); + SearchContext.builder().indexConvention(IndexConventionImpl.noPrefix("MD5")).build(); assertEquals(initial.getSearchFlags(), new SearchFlags().setSkipCache(false)); SearchContext mutated = initial.withFlagDefaults(flags -> flags.setSkipCache(true)); diff --git a/metadata-service/auth-servlet-impl/src/main/java/com/datahub/auth/authentication/AuthServiceController.java b/metadata-service/auth-servlet-impl/src/main/java/com/datahub/auth/authentication/AuthServiceController.java index 71eaca71a3641a..de2582af00a932 100644 --- a/metadata-service/auth-servlet-impl/src/main/java/com/datahub/auth/authentication/AuthServiceController.java +++ b/metadata-service/auth-servlet-impl/src/main/java/com/datahub/auth/authentication/AuthServiceController.java @@ -123,7 +123,7 @@ CompletableFuture> generateSessionTokenForUser( try { bodyJson = mapper.readTree(jsonStr); } catch (JsonProcessingException e) { - log.error("Failed to parse json while attempting to generate session token {}", jsonStr, e); + log.error("Failed to parse json while attempting to generate session token ", e); return CompletableFuture.completedFuture(new ResponseEntity<>(HttpStatus.BAD_REQUEST)); } if (bodyJson == null) { @@ -238,7 +238,7 @@ CompletableFuture> signUp(final HttpEntity httpEn try { Urn inviteTokenUrn = _inviteTokenService.getInviteTokenUrn(inviteTokenString); if (!_inviteTokenService.isInviteTokenValid(systemOperationContext, inviteTokenUrn)) { - log.error("Invalid invite token {}", inviteTokenString); + log.error("Invalid invite token"); return new ResponseEntity<>(HttpStatus.BAD_REQUEST); } @@ -386,7 +386,7 @@ CompletableFuture> track(final HttpEntity httpEnt try { bodyJson = mapper.readTree(jsonStr); } catch (JsonProcessingException e) { - log.error("Failed to parse json while attempting to track analytics event {}", jsonStr); + log.error("Failed to parse json while attempting to track analytics event", e); return CompletableFuture.completedFuture(new ResponseEntity<>(HttpStatus.BAD_REQUEST)); } if (bodyJson == null) { diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/ElasticSearchConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/ElasticSearchConfiguration.java index 130620a9ab918c..7d68e18940401e 100644 --- a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/ElasticSearchConfiguration.java +++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/ElasticSearchConfiguration.java @@ -8,4 +8,5 @@ public class ElasticSearchConfiguration { private BuildIndicesConfiguration buildIndices; public String implementation; private SearchConfiguration search; + private String idHashAlgo; } diff --git a/metadata-service/configuration/src/main/resources/application.yaml b/metadata-service/configuration/src/main/resources/application.yaml index 5b3673ddca52c6..3f1c6dd1a3d7d8 100644 --- a/metadata-service/configuration/src/main/resources/application.yaml +++ b/metadata-service/configuration/src/main/resources/application.yaml @@ -335,6 +335,11 @@ systemUpdate: batchSize: ${BOOTSTRAP_SYSTEM_UPDATE_DATA_JOB_NODE_CLL_BATCH_SIZE:1000} delayMs: ${BOOTSTRAP_SYSTEM_UPDATE_DATA_JOB_NODE_CLL_DELAY_MS:30000} limit: ${BOOTSTRAP_SYSTEM_UPDATE_DATA_JOB_NODE_CLL_LIMIT:0} + domainDescription: + enabled: ${BOOTSTRAP_SYSTEM_UPDATE_DOMAIN_DESCRIPTION_ENABLED:true} + batchSize: ${BOOTSTRAP_SYSTEM_UPDATE_DOMAIN_DESCRIPTION_BATCH_SIZE:1000} + delayMs: ${BOOTSTRAP_SYSTEM_UPDATE_DOMAIN_DESCRIPTION_DELAY_MS:30000} + limit: ${BOOTSTRAP_SYSTEM_UPDATE_DOMAIN_DESCRIPTION_CLL_LIMIT:0} browsePathsV2: enabled: ${BOOTSTRAP_SYSTEM_UPDATE_BROWSE_PATHS_V2_ENABLED:true} batchSize: ${BOOTSTRAP_SYSTEM_UPDATE_BROWSE_PATHS_V2_BATCH_SIZE:5000} diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchGraphServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchGraphServiceFactory.java index eb56e8d42c158e..55eb931625fecc 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchGraphServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchGraphServiceFactory.java @@ -11,6 +11,7 @@ import javax.annotation.Nonnull; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.Import; @@ -30,7 +31,8 @@ public class ElasticSearchGraphServiceFactory { @Bean(name = "elasticSearchGraphService") @Nonnull - protected ElasticSearchGraphService getInstance() { + protected ElasticSearchGraphService getInstance( + @Value("${elasticsearch.idHashAlgo}") final String idHashAlgo) { LineageRegistry lineageRegistry = new LineageRegistry(entityRegistry); return new ElasticSearchGraphService( lineageRegistry, @@ -45,6 +47,7 @@ protected ElasticSearchGraphService getInstance() { lineageRegistry, components.getIndexConvention(), configurationProvider.getElasticSearch().getSearch().getGraph()), - components.getIndexBuilder()); + components.getIndexBuilder(), + idHashAlgo); } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchSystemMetadataServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchSystemMetadataServiceFactory.java index d560fba399f340..fb48d64ce7ba9e 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchSystemMetadataServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchSystemMetadataServiceFactory.java @@ -6,6 +6,7 @@ import javax.annotation.Nonnull; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.Import; @@ -19,7 +20,8 @@ public class ElasticSearchSystemMetadataServiceFactory { @Bean(name = "elasticSearchSystemMetadataService") @Nonnull - protected ElasticSearchSystemMetadataService getInstance() { + protected ElasticSearchSystemMetadataService getInstance( + @Value("${elasticsearch.idHashAlgo}") final String elasticIdHashAlgo) { return new ElasticSearchSystemMetadataService( components.getBulkProcessor(), components.getIndexConvention(), @@ -28,6 +30,7 @@ protected ElasticSearchSystemMetadataService getInstance() { components.getIndexConvention(), components.getBulkProcessor(), components.getNumRetries()), - components.getIndexBuilder()); + components.getIndexBuilder(), + elasticIdHashAlgo); } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/IndexConventionFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/IndexConventionFactory.java index 5b76a3f2cb833f..2288c8d4ecd50d 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/IndexConventionFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/IndexConventionFactory.java @@ -19,7 +19,8 @@ public class IndexConventionFactory { private String indexPrefix; @Bean(name = INDEX_CONVENTION_BEAN) - protected IndexConvention createInstance() { - return new IndexConventionImpl(indexPrefix); + protected IndexConvention createInstance( + @Value("${elasticsearch.idHashAlgo}") final String isHashAlgo) { + return new IndexConventionImpl(indexPrefix, isHashAlgo); } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/update/indices/UpdateIndicesServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/update/indices/UpdateIndicesServiceFactory.java index fad9d0eaf3b45c..38a344f8be8e92 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/update/indices/UpdateIndicesServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/update/indices/UpdateIndicesServiceFactory.java @@ -9,6 +9,7 @@ import com.linkedin.metadata.service.UpdateIndicesService; import com.linkedin.metadata.systemmetadata.SystemMetadataService; import com.linkedin.metadata.timeseries.TimeseriesAspectService; +import org.springframework.beans.factory.annotation.Value; import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; @@ -30,7 +31,8 @@ public UpdateIndicesService searchIndicesServiceNonGMS( TimeseriesAspectService timeseriesAspectService, SystemMetadataService systemMetadataService, SearchDocumentTransformer searchDocumentTransformer, - EntityIndexBuilders entityIndexBuilders) { + EntityIndexBuilders entityIndexBuilders, + @Value("${elasticsearch.idHashAlgo}") final String idHashAlgo) { return new UpdateIndicesService( graphService, @@ -38,7 +40,8 @@ public UpdateIndicesService searchIndicesServiceNonGMS( timeseriesAspectService, systemMetadataService, searchDocumentTransformer, - entityIndexBuilders); + entityIndexBuilders, + idHashAlgo); } @Bean @@ -50,7 +53,8 @@ public UpdateIndicesService searchIndicesServiceGMS( final SystemMetadataService systemMetadataService, final SearchDocumentTransformer searchDocumentTransformer, final EntityIndexBuilders entityIndexBuilders, - final EntityService entityService) { + final EntityService entityService, + @Value("${elasticsearch.idHashAlgo}") final String idHashAlgo) { UpdateIndicesService updateIndicesService = new UpdateIndicesService( @@ -59,7 +63,8 @@ public UpdateIndicesService searchIndicesServiceGMS( timeseriesAspectService, systemMetadataService, searchDocumentTransformer, - entityIndexBuilders); + entityIndexBuilders, + idHashAlgo); entityService.setUpdateIndicesService(updateIndicesService); diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java index 518dfecd576808..1b003fec82e8b8 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java @@ -2,25 +2,20 @@ import static com.datahub.authorization.AuthUtil.isAPIAuthorized; import static com.datahub.authorization.AuthUtil.isAPIAuthorizedEntityUrns; -import static com.linkedin.metadata.Constants.*; import static com.linkedin.metadata.authorization.ApiOperation.UPDATE; import static com.linkedin.metadata.timeseries.elastic.UsageServiceUtil.USAGE_STATS_ASPECT_NAME; import static com.linkedin.metadata.timeseries.elastic.UsageServiceUtil.USAGE_STATS_ENTITY_NAME; import com.codahale.metrics.MetricRegistry; -import com.codahale.metrics.Timer; import com.datahub.authentication.Authentication; import com.datahub.authentication.AuthenticationContext; import com.datahub.authorization.EntitySpec; import com.datahub.plugins.auth.authorization.Authorizer; import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.core.StreamReadConstraints; import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; import com.linkedin.common.WindowDuration; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; -import com.linkedin.data.template.StringArray; import com.linkedin.dataset.DatasetFieldUsageCounts; import com.linkedin.dataset.DatasetFieldUsageCountsArray; import com.linkedin.dataset.DatasetUsageStatistics; @@ -29,17 +24,10 @@ import com.linkedin.metadata.authorization.PoliciesConfig; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.registry.EntityRegistry; -import com.linkedin.metadata.query.filter.Condition; -import com.linkedin.metadata.query.filter.ConjunctiveCriterion; -import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; -import com.linkedin.metadata.query.filter.CriterionArray; -import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.restli.RestliUtil; import com.linkedin.metadata.timeseries.TimeseriesAspectService; import com.linkedin.metadata.timeseries.elastic.UsageServiceUtil; import com.linkedin.metadata.timeseries.transformer.TimeseriesAspectTransformer; -import com.linkedin.metadata.utils.metrics.MetricUtils; import com.linkedin.parseq.Task; import com.linkedin.restli.common.HttpStatus; import com.linkedin.restli.server.RestLiServiceException; @@ -47,35 +35,20 @@ import com.linkedin.restli.server.annotations.ActionParam; import com.linkedin.restli.server.annotations.RestLiSimpleResource; import com.linkedin.restli.server.resources.SimpleResourceTemplate; -import com.linkedin.timeseries.AggregationSpec; -import com.linkedin.timeseries.AggregationType; -import com.linkedin.timeseries.CalendarInterval; -import com.linkedin.timeseries.GenericTable; -import com.linkedin.timeseries.GroupingBucket; -import com.linkedin.timeseries.GroupingBucketType; import com.linkedin.timeseries.TimeWindowSize; import com.linkedin.usage.FieldUsageCounts; -import com.linkedin.usage.FieldUsageCountsArray; import com.linkedin.usage.UsageAggregation; -import com.linkedin.usage.UsageAggregationArray; import com.linkedin.usage.UsageAggregationMetrics; import com.linkedin.usage.UsageQueryResult; -import com.linkedin.usage.UsageQueryResultAggregations; import com.linkedin.usage.UsageTimeRange; import com.linkedin.usage.UserUsageCounts; -import com.linkedin.usage.UserUsageCountsArray; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.metadata.context.RequestContext; import io.opentelemetry.extension.annotations.WithSpan; -import java.net.URISyntaxException; -import java.time.Instant; -import java.util.ArrayList; import java.util.Arrays; -import java.util.List; import java.util.Map; import java.util.Set; import java.util.stream.Collectors; -import java.util.concurrent.TimeUnit; import javax.annotation.Nonnull; import javax.inject.Inject; import javax.inject.Named; @@ -255,7 +228,8 @@ private void ingest(@Nonnull OperationContext opContext, @Nonnull UsageAggregati try { documents = TimeseriesAspectTransformer.transform( - bucket.getResource(), datasetUsageStatistics, getUsageStatsAspectSpec(), null); + bucket.getResource(), datasetUsageStatistics, getUsageStatsAspectSpec(), null, + systemOperationContext.getSearchContext().getIndexConvention().getIdHashAlgo()); } catch (JsonProcessingException e) { log.error("Failed to generate timeseries document from aspect: {}", e.toString()); return; diff --git a/metadata-service/schema-registry-servlet/src/main/java/io/datahubproject/openapi/schema/registry/SchemaRegistryController.java b/metadata-service/schema-registry-servlet/src/main/java/io/datahubproject/openapi/schema/registry/SchemaRegistryController.java index d73b353f38ae78..09043c6dd5e87e 100644 --- a/metadata-service/schema-registry-servlet/src/main/java/io/datahubproject/openapi/schema/registry/SchemaRegistryController.java +++ b/metadata-service/schema-registry-servlet/src/main/java/io/datahubproject/openapi/schema/registry/SchemaRegistryController.java @@ -307,7 +307,11 @@ public ResponseEntity register( }) .orElseGet( () -> { - log.error("Couldn't find topic with name {}.", topicName); + if (topicName.matches("^[a-zA-Z0-9._-]+$")) { + log.error("Couldn't find topic with name {}.", topicName); + } else { + log.error("Couldn't find topic (Malformed topic name)"); + } return new ResponseEntity<>(HttpStatus.NOT_FOUND); }); } diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java index aed9b97411ff68..ed14dec4ed940a 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java @@ -729,11 +729,11 @@ private MetadataChangeProposal updateFormsAspect( .collect(Collectors.toList()); List completedForms = formsAspect.getCompletedForms().stream() - .filter(completedForm -> completedForm.getUrn() != deletedUrn) + .filter(completedForm -> !completedForm.getUrn().equals(deletedUrn)) .collect(Collectors.toList()); final List verifications = formsAspect.getVerifications().stream() - .filter(verification -> verification.getForm() != deletedUrn) + .filter(verification -> !verification.getForm().equals(deletedUrn)) .collect(Collectors.toList()); updatedAspect.get().setIncompleteForms(new FormAssociationArray(incompleteForms)); diff --git a/metadata-service/war/src/main/resources/boot/global_settings.json b/metadata-service/war/src/main/resources/boot/global_settings.json index 129783afd6df49..35145b85202a7b 100644 --- a/metadata-service/war/src/main/resources/boot/global_settings.json +++ b/metadata-service/war/src/main/resources/boot/global_settings.json @@ -1,4 +1,8 @@ { "views": { + }, + "docPropagation": { + "enabled": true, + "columnPropagationEnabled": true } } \ No newline at end of file diff --git a/metadata-service/war/src/main/resources/boot/policies.json b/metadata-service/war/src/main/resources/boot/policies.json index b684fd88e61c29..e62d0a33e7cd05 100644 --- a/metadata-service/war/src/main/resources/boot/policies.json +++ b/metadata-service/war/src/main/resources/boot/policies.json @@ -365,7 +365,8 @@ "glossaryTerm", "glossaryNode", "notebook", - "dataProduct" + "dataProduct", + "dataProcessInstance" ], "condition": "EQUALS" } diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/utils/elasticsearch/IndexConvention.java b/metadata-utils/src/main/java/com/linkedin/metadata/utils/elasticsearch/IndexConvention.java index 4a3f78fcef7bd6..87aebabf643666 100644 --- a/metadata-utils/src/main/java/com/linkedin/metadata/utils/elasticsearch/IndexConvention.java +++ b/metadata-utils/src/main/java/com/linkedin/metadata/utils/elasticsearch/IndexConvention.java @@ -47,4 +47,7 @@ public interface IndexConvention { * if one cannot be extracted */ Optional> getEntityAndAspectName(String timeseriesAspectIndexName); + + @Nonnull + String getIdHashAlgo(); } diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/utils/elasticsearch/IndexConventionImpl.java b/metadata-utils/src/main/java/com/linkedin/metadata/utils/elasticsearch/IndexConventionImpl.java index 47801cd2054fa4..2c9c927cd8c347 100644 --- a/metadata-utils/src/main/java/com/linkedin/metadata/utils/elasticsearch/IndexConventionImpl.java +++ b/metadata-utils/src/main/java/com/linkedin/metadata/utils/elasticsearch/IndexConventionImpl.java @@ -8,25 +8,30 @@ import java.util.concurrent.ConcurrentHashMap; import javax.annotation.Nonnull; import javax.annotation.Nullable; +import lombok.Getter; import org.apache.commons.lang3.StringUtils; // Default implementation of search index naming convention public class IndexConventionImpl implements IndexConvention { - public static final IndexConvention NO_PREFIX = new IndexConventionImpl(null); + public static IndexConvention noPrefix(@Nonnull String idHashAlgo) { + return new IndexConventionImpl(null, idHashAlgo); + } // Map from Entity name -> Index name private final Map indexNameMapping = new ConcurrentHashMap<>(); private final Optional _prefix; private final String _getAllEntityIndicesPattern; private final String _getAllTimeseriesIndicesPattern; + @Getter private final String idHashAlgo; private static final String ENTITY_INDEX_VERSION = "v2"; private static final String ENTITY_INDEX_SUFFIX = "index"; private static final String TIMESERIES_INDEX_VERSION = "v1"; private static final String TIMESERIES_ENTITY_INDEX_SUFFIX = "aspect"; - public IndexConventionImpl(@Nullable String prefix) { + public IndexConventionImpl(@Nullable String prefix, String idHashAlgo) { _prefix = StringUtils.isEmpty(prefix) ? Optional.empty() : Optional.of(prefix); + this.idHashAlgo = idHashAlgo; _getAllEntityIndicesPattern = _prefix.map(p -> p + "_").orElse("") + "*" diff --git a/metadata-utils/src/test/java/com/linkedin/metadata/utils/elasticsearch/IndexConventionImplTest.java b/metadata-utils/src/test/java/com/linkedin/metadata/utils/elasticsearch/IndexConventionImplTest.java index 8074f344cd2441..2f6c7138d3c4fb 100644 --- a/metadata-utils/src/test/java/com/linkedin/metadata/utils/elasticsearch/IndexConventionImplTest.java +++ b/metadata-utils/src/test/java/com/linkedin/metadata/utils/elasticsearch/IndexConventionImplTest.java @@ -10,7 +10,7 @@ public class IndexConventionImplTest { @Test public void testIndexConventionNoPrefix() { - IndexConvention indexConventionNoPrefix = IndexConventionImpl.NO_PREFIX; + IndexConvention indexConventionNoPrefix = IndexConventionImpl.noPrefix("MD5"); String entityName = "dataset"; String expectedIndexName = "datasetindex_v2"; assertEquals(indexConventionNoPrefix.getEntityIndexName(entityName), expectedIndexName); @@ -25,7 +25,7 @@ public void testIndexConventionNoPrefix() { @Test public void testIndexConventionPrefix() { - IndexConvention indexConventionPrefix = new IndexConventionImpl("prefix"); + IndexConvention indexConventionPrefix = new IndexConventionImpl("prefix", "MD5"); String entityName = "dataset"; String expectedIndexName = "prefix_datasetindex_v2"; assertEquals(indexConventionPrefix.getEntityIndexName(entityName), expectedIndexName); @@ -42,7 +42,7 @@ public void testIndexConventionPrefix() { @Test public void testTimeseriesIndexConventionNoPrefix() { - IndexConvention indexConventionNoPrefix = IndexConventionImpl.NO_PREFIX; + IndexConvention indexConventionNoPrefix = IndexConventionImpl.noPrefix("MD5"); String entityName = "dataset"; String aspectName = "datasetusagestatistics"; String expectedIndexName = "dataset_datasetusagestatisticsaspect_v1"; @@ -64,7 +64,7 @@ public void testTimeseriesIndexConventionNoPrefix() { @Test public void testTimeseriesIndexConventionPrefix() { - IndexConvention indexConventionPrefix = new IndexConventionImpl("prefix"); + IndexConvention indexConventionPrefix = new IndexConventionImpl("prefix", "MD5"); String entityName = "dataset"; String aspectName = "datasetusagestatistics"; String expectedIndexName = "prefix_dataset_datasetusagestatisticsaspect_v1"; diff --git a/settings.gradle b/settings.gradle index b850816ab5e6b7..899ca8f6f869b5 100644 --- a/settings.gradle +++ b/settings.gradle @@ -61,6 +61,7 @@ include 'metadata-integration:java:openlineage-converter' include 'metadata-integration:java:acryl-spark-lineage' include 'ingestion-scheduler' include 'metadata-ingestion-modules:airflow-plugin' +include 'metadata-ingestion-modules:gx-plugin' include 'metadata-ingestion-modules:dagster-plugin' include 'smoke-test' include 'metadata-auth:auth-api' diff --git a/smoke-test/build.gradle b/smoke-test/build.gradle index 95f3ba8ed56d64..a9e5a8942b71ec 100644 --- a/smoke-test/build.gradle +++ b/smoke-test/build.gradle @@ -44,12 +44,19 @@ task yarnInstall(type: YarnTask) { environment = ['NODE_OPTIONS': '--openssl-legacy-provider'] args = ['install', '--cwd', "${project.rootDir}/smoke-test/tests/cypress"] } + task cypressLint(type: YarnTask, dependsOn: yarnInstall) { environment = ['NODE_OPTIONS': '--openssl-legacy-provider'] // TODO: Run a full lint instead of just format. args = ['--cwd', "${project.rootDir}/smoke-test/tests/cypress", 'run', 'format'] } +task cypressLintFix(type: YarnTask, dependsOn: yarnInstall) { + environment = ['NODE_OPTIONS': '--openssl-legacy-provider'] + // TODO: Run a full lint instead of just format. + args = ['--cwd', "${project.rootDir}/smoke-test/tests/cypress", 'run', 'format', '--write'] +} + task installDev(type: Exec) { inputs.file file('pyproject.toml') inputs.file file('requirements.txt') @@ -86,10 +93,7 @@ task pythonLintFix(type: Exec, dependsOn: installDev) { */ task noCypressSuite0(type: Exec, dependsOn: [installDev, ':metadata-ingestion:installDev']) { environment 'RUN_QUICKSTART', 'false' - environment 'DATAHUB_KAFKA_SCHEMA_REGISTRY_URL', 'http://localhost:8080/schema-registry/api/' - environment 'KAFKA_BROKER_CONTAINER', 'datahub-kafka-broker-1' environment 'TEST_STRATEGY', 'no_cypress_suite0' - environment "ELASTIC_ID_HASH_ALGO", "MD5" workingDir = project.projectDir commandLine 'bash', '-c', @@ -99,10 +103,7 @@ task noCypressSuite0(type: Exec, dependsOn: [installDev, ':metadata-ingestion:in task noCypressSuite1(type: Exec, dependsOn: [installDev, ':metadata-ingestion:installDev']) { environment 'RUN_QUICKSTART', 'false' - environment 'DATAHUB_KAFKA_SCHEMA_REGISTRY_URL', 'http://localhost:8080/schema-registry/api/' - environment 'KAFKA_BROKER_CONTAINER', 'datahub-kafka-broker-1' environment 'TEST_STRATEGY', 'no_cypress_suite1' - environment "ELASTIC_ID_HASH_ALGO", "MD5" workingDir = project.projectDir commandLine 'bash', '-c', @@ -112,10 +113,7 @@ task noCypressSuite1(type: Exec, dependsOn: [installDev, ':metadata-ingestion:in task cypressSuite1(type: Exec, dependsOn: [installDev, ':metadata-ingestion:installDev']) { environment 'RUN_QUICKSTART', 'false' - environment 'DATAHUB_KAFKA_SCHEMA_REGISTRY_URL', 'http://localhost:8080/schema-registry/api/' - environment 'KAFKA_BROKER_CONTAINER', 'datahub-kafka-broker-1' environment 'TEST_STRATEGY', 'cypress_suite1' - environment "ELASTIC_ID_HASH_ALGO", "MD5" workingDir = project.projectDir commandLine 'bash', '-c', @@ -125,10 +123,7 @@ task cypressSuite1(type: Exec, dependsOn: [installDev, ':metadata-ingestion:inst task cypressRest(type: Exec, dependsOn: [installDev, ':metadata-ingestion:installDev']) { environment 'RUN_QUICKSTART', 'false' - environment 'DATAHUB_KAFKA_SCHEMA_REGISTRY_URL', 'http://localhost:8080/schema-registry/api/' - environment 'KAFKA_BROKER_CONTAINER', 'datahub-kafka-broker-1' environment 'TEST_STRATEGY', 'cypress_rest' - environment "ELASTIC_ID_HASH_ALGO", "MD5" workingDir = project.projectDir commandLine 'bash', '-c', @@ -141,9 +136,6 @@ task cypressRest(type: Exec, dependsOn: [installDev, ':metadata-ingestion:instal */ task cypressDev(type: Exec, dependsOn: [installDev, ':metadata-ingestion:installDev']) { environment 'RUN_QUICKSTART', 'false' - environment 'DATAHUB_KAFKA_SCHEMA_REGISTRY_URL', 'http://localhost:8080/schema-registry/api/' - environment 'KAFKA_BROKER_CONTAINER', 'datahub-kafka-broker-1' - environment "ELASTIC_ID_HASH_ALGO", "MD5" workingDir = project.projectDir commandLine 'bash', '-c', @@ -156,13 +148,18 @@ task cypressDev(type: Exec, dependsOn: [installDev, ':metadata-ingestion:install */ task cypressData(type: Exec, dependsOn: [installDev, ':metadata-ingestion:installDev']) { environment 'RUN_QUICKSTART', 'false' - environment 'DATAHUB_KAFKA_SCHEMA_REGISTRY_URL', 'http://localhost:8080/schema-registry/api/' - environment 'KAFKA_BROKER_CONTAINER', 'datahub-kafka-broker-1' environment 'RUN_UI', 'false' - environment "ELASTIC_ID_HASH_ALGO", "MD5" workingDir = project.projectDir commandLine 'bash', '-c', "source ${venv_name}/bin/activate && set -x && " + "./cypress-dev.sh" -} \ No newline at end of file +} + +task lint { + dependsOn pythonLint, cypressLint +} + +task lintFix { + dependsOn pythonLintFix +} diff --git a/smoke-test/cypress-dev.sh b/smoke-test/cypress-dev.sh index 59346b26069059..bce2d794b18691 100755 --- a/smoke-test/cypress-dev.sh +++ b/smoke-test/cypress-dev.sh @@ -10,9 +10,9 @@ fi source venv/bin/activate -export KAFKA_BROKER_CONTAINER="datahub-kafka-broker-1" -export KAFKA_BOOTSTRAP_SERVER="broker:9092" -export ELASTIC_ID_HASH_ALGO="MD5" +# set environment variables for the test +source ./set-test-env-vars.sh + python -c 'from tests.cypress.integration_test import ingest_data; ingest_data()' cd tests/cypress diff --git a/smoke-test/run-quickstart.sh b/smoke-test/run-quickstart.sh index 1923d42eb5e939..eb0d46b3172442 100755 --- a/smoke-test/run-quickstart.sh +++ b/smoke-test/run-quickstart.sh @@ -10,17 +10,17 @@ source venv/bin/activate mkdir -p ~/.datahub/plugins/frontend/auth/ echo "test_user:test_pass" >> ~/.datahub/plugins/frontend/auth/user.props +echo "DATAHUB_VERSION = $DATAHUB_VERSION" DATAHUB_SEARCH_IMAGE="${DATAHUB_SEARCH_IMAGE:=opensearchproject/opensearch}" DATAHUB_SEARCH_TAG="${DATAHUB_SEARCH_TAG:=2.9.0}" XPACK_SECURITY_ENABLED="${XPACK_SECURITY_ENABLED:=plugins.security.disabled=true}" ELASTICSEARCH_USE_SSL="${ELASTICSEARCH_USE_SSL:=false}" USE_AWS_ELASTICSEARCH="${USE_AWS_ELASTICSEARCH:=true}" -ELASTIC_ID_HASH_ALGO="${ELASTIC_ID_HASH_ALGO:=MD5}" -echo "DATAHUB_VERSION = $DATAHUB_VERSION" DATAHUB_TELEMETRY_ENABLED=false \ DOCKER_COMPOSE_BASE="file://$( dirname "$DIR" )" \ DATAHUB_SEARCH_IMAGE="$DATAHUB_SEARCH_IMAGE" DATAHUB_SEARCH_TAG="$DATAHUB_SEARCH_TAG" \ XPACK_SECURITY_ENABLED="$XPACK_SECURITY_ENABLED" ELASTICSEARCH_USE_SSL="$ELASTICSEARCH_USE_SSL" \ USE_AWS_ELASTICSEARCH="$USE_AWS_ELASTICSEARCH" \ -datahub docker quickstart --version ${DATAHUB_VERSION} --standalone_consumers --dump-logs-on-failure --kafka-setup +DATAHUB_VERSION=${DATAHUB_VERSION} \ +docker compose --project-directory ../docker/profiles --profile quickstart-consumers up -d --quiet-pull --wait --wait-timeout 900 diff --git a/smoke-test/set-cypress-creds.sh b/smoke-test/set-cypress-creds.sh index 82fe736b0a7e18..fc6e7dd42f5dea 100644 --- a/smoke-test/set-cypress-creds.sh +++ b/smoke-test/set-cypress-creds.sh @@ -2,4 +2,4 @@ export CYPRESS_ADMIN_USERNAME=${ADMIN_USERNAME:-datahub} export CYPRESS_ADMIN_PASSWORD=${ADMIN_PASSWORD:-datahub} -export CYPRESS_ADMIN_DISPLAYNAME=${ADMIN_DISPLAYNAME:-DataHub} \ No newline at end of file +export CYPRESS_ADMIN_DISPLAYNAME=${ADMIN_DISPLAYNAME:-DataHub} diff --git a/smoke-test/set-test-env-vars.sh b/smoke-test/set-test-env-vars.sh new file mode 100644 index 00000000000000..4668721f80de08 --- /dev/null +++ b/smoke-test/set-test-env-vars.sh @@ -0,0 +1,2 @@ +export DATAHUB_KAFKA_SCHEMA_REGISTRY_URL=http://localhost:8080/schema-registry/api +export DATAHUB_GMS_URL=http://localhost:8080 \ No newline at end of file diff --git a/smoke-test/smoke.sh b/smoke-test/smoke.sh index c16865fe1e71ef..5b3e8a9377a6ca 100755 --- a/smoke-test/smoke.sh +++ b/smoke-test/smoke.sh @@ -16,16 +16,23 @@ cd "$DIR" if [ "${RUN_QUICKSTART:-true}" == "true" ]; then source ./run-quickstart.sh +else + mkdir -p ~/.datahub/plugins/frontend/auth/ + echo "test_user:test_pass" >> ~/.datahub/plugins/frontend/auth/user.props + echo "datahub:datahub" > ~/.datahub/plugins/frontend/auth/user.props + + python3 -m venv venv + source venv/bin/activate + python -m pip install --upgrade pip uv>=0.1.10 wheel setuptools + uv pip install -r requirements.txt fi -source venv/bin/activate - (cd ..; ./gradlew :smoke-test:yarnInstall) source ./set-cypress-creds.sh -export DATAHUB_GMS_URL=http://localhost:8080 -export ELASTIC_ID_HASH_ALGO="MD5" +# set environment variables for the test +source ./set-test-env-vars.sh # no_cypress_suite0, no_cypress_suite1, cypress_suite1, cypress_rest if [[ -z "${TEST_STRATEGY}" ]]; then diff --git a/smoke-test/test_e2e.py b/smoke-test/test_e2e.py index abb4841314c4af..74d64a8193173a 100644 --- a/smoke-test/test_e2e.py +++ b/smoke-test/test_e2e.py @@ -21,6 +21,7 @@ get_frontend_session, get_admin_credentials, get_root_urn, + wait_for_writes_to_sync, ) bootstrap_sample_data = "../metadata-ingestion/examples/mce_files/bootstrap_mce.json" @@ -150,11 +151,13 @@ def _ensure_group_not_present(urn: str, frontend_session) -> Any: def test_ingestion_via_rest(wait_for_healthchecks): ingest_file_via_rest(bootstrap_sample_data) _ensure_user_present(urn=get_root_urn()) + wait_for_writes_to_sync() @pytest.mark.dependency(depends=["test_healthchecks"]) def test_ingestion_usage_via_rest(wait_for_healthchecks): ingest_file_via_rest(usage_sample_data) + wait_for_writes_to_sync() @pytest.mark.dependency(depends=["test_healthchecks"]) @@ -185,6 +188,7 @@ def test_ingestion_via_kafka(wait_for_healthchecks): # Since Kafka emission is asynchronous, we must wait a little bit so that # the changes are actually processed. time.sleep(kafka_post_ingestion_wait_sec) + wait_for_writes_to_sync() @pytest.mark.dependency( @@ -196,6 +200,7 @@ def test_ingestion_via_kafka(wait_for_healthchecks): ) def test_run_ingestion(wait_for_healthchecks): # Dummy test so that future ones can just depend on this one. + wait_for_writes_to_sync() pass @@ -1384,7 +1389,9 @@ def test_native_user_endpoints(frontend_session): unauthenticated_get_invite_token_response = unauthenticated_session.post( f"{get_frontend_url()}/api/v2/graphql", json=get_invite_token_json ) - assert unauthenticated_get_invite_token_response.status_code == HTTPStatus.UNAUTHORIZED + assert ( + unauthenticated_get_invite_token_response.status_code == HTTPStatus.UNAUTHORIZED + ) unauthenticated_create_reset_token_json = { "query": """mutation createNativeUserResetToken($input: CreateNativeUserResetTokenInput!) {\n @@ -1399,7 +1406,10 @@ def test_native_user_endpoints(frontend_session): f"{get_frontend_url()}/api/v2/graphql", json=unauthenticated_create_reset_token_json, ) - assert unauthenticated_create_reset_token_response.status_code == HTTPStatus.UNAUTHORIZED + assert ( + unauthenticated_create_reset_token_response.status_code + == HTTPStatus.UNAUTHORIZED + ) # cleanup steps json = { diff --git a/smoke-test/tests/consistency_utils.py b/smoke-test/tests/consistency_utils.py index 4335e2a874c1e7..1eddc46bb220b7 100644 --- a/smoke-test/tests/consistency_utils.py +++ b/smoke-test/tests/consistency_utils.py @@ -8,14 +8,31 @@ ELASTICSEARCH_REFRESH_INTERVAL_SECONDS: int = int( os.getenv("ELASTICSEARCH_REFRESH_INTERVAL_SECONDS", 5) ) -KAFKA_BROKER_CONTAINER: str = str( - os.getenv("KAFKA_BROKER_CONTAINER", "datahub-broker-1") -) KAFKA_BOOTSTRAP_SERVER: str = str(os.getenv("KAFKA_BOOTSTRAP_SERVER", "broker:29092")) logger = logging.getLogger(__name__) +def infer_kafka_broker_container() -> str: + cmd = "docker ps --format '{{.Names}}' | grep broker" + completed_process = subprocess.run( + cmd, + capture_output=True, + shell=True, + text=True, + ) + result = str(completed_process.stdout) + lines = result.splitlines() + if len(lines) == 0: + raise ValueError("No Kafka broker containers found") + return lines[0] + + +KAFKA_BROKER_CONTAINER: str = str( + os.getenv("KAFKA_BROKER_CONTAINER", infer_kafka_broker_container()) +) + + def wait_for_writes_to_sync(max_timeout_in_sec: int = 120) -> None: if USE_STATIC_SLEEP: time.sleep(ELASTICSEARCH_REFRESH_INTERVAL_SECONDS) @@ -44,7 +61,9 @@ def wait_for_writes_to_sync(max_timeout_in_sec: int = 120) -> None: if maximum_lag == 0: lag_zero = True except ValueError: - logger.warning(f"Error reading kafka lag using command: {cmd}") + logger.warning( + f"Error reading kafka lag using command: {cmd}", exc_info=True + ) if not lag_zero: logger.warning( diff --git a/smoke-test/tests/cypress/cypress/e2e/actions/docPropagation.js b/smoke-test/tests/cypress/cypress/e2e/actions/docPropagation.js new file mode 100644 index 00000000000000..3d7e14195ab64f --- /dev/null +++ b/smoke-test/tests/cypress/cypress/e2e/actions/docPropagation.js @@ -0,0 +1,27 @@ +const testId = '[data-testid="docPropagationIndicator"]'; + +describe("docPropagation", () => { + it("logs in and navigates to the schema page and checks for docPropagationIndicator", () => { + cy.login(); + cy.visit( + "/dataset/urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_deleted,PROD)/Schema?is_lineage_mode=false&schemaFilter=", + "/dataset/urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_deleted,PROD)/Schema?is_lineage_mode=false&schemaFilter=", + ); + + // verify that the indicator exists in the table + cy.get(testId).should("exist"); + + // click on the table row + cy.get('[data-row-key="user_id"]').click(); + + // verify that the indicator exists in id="entity-profile-sidebar" + cy.get('[id="entity-profile-sidebar"]') + .then(($sidebar) => { + if ($sidebar.find(testId).length) return testId; + return null; + }) + .then((selector) => { + cy.get(selector).should("exist"); + }); + }); +}); diff --git a/smoke-test/tests/cypress/data.json b/smoke-test/tests/cypress/data.json index 5253b7a33b085f..ce61f7c83a0389 100644 --- a/smoke-test/tests/cypress/data.json +++ b/smoke-test/tests/cypress/data.json @@ -96,7 +96,11 @@ }, "nativeDataType": "varchar(100)", "globalTags": { - "tags": [{ "tag": "urn:li:tag:NeedsDocumentation" }] + "tags": [ + { + "tag": "urn:li:tag:NeedsDocumentation" + } + ] }, "recursive": false }, @@ -137,7 +141,11 @@ }, { "com.linkedin.pegasus2avro.common.GlobalTags": { - "tags": [{ "tag": "urn:li:tag:Cypress" }] + "tags": [ + { + "tag": "urn:li:tag:Cypress" + } + ] } } ] @@ -246,7 +254,13 @@ "editableSchemaFieldInfo": [ { "fieldPath": "shipment_info", - "globalTags": { "tags": [{ "tag": "urn:li:tag:Legacy" }] }, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Legacy" + } + ] + }, "glossaryTerms": { "terms": [ { @@ -401,8 +415,12 @@ { "com.linkedin.pegasus2avro.common.GlobalTags": { "tags": [ - { "tag": "urn:li:tag:Cypress" }, - { "tag": "urn:li:tag:Cypress2" } + { + "tag": "urn:li:tag:Cypress" + }, + { + "tag": "urn:li:tag:Cypress2" + } ] } } @@ -542,7 +560,11 @@ }, { "com.linkedin.pegasus2avro.common.GlobalTags": { - "tags": [{ "tag": "urn:li:tag:Cypress" }] + "tags": [ + { + "tag": "urn:li:tag:Cypress" + } + ] } } ] @@ -718,7 +740,11 @@ }, { "com.linkedin.pegasus2avro.common.GlobalTags": { - "tags": [{ "tag": "urn:li:tag:Cypress" }] + "tags": [ + { + "tag": "urn:li:tag:Cypress" + } + ] } } ] @@ -1011,7 +1037,11 @@ }, { "com.linkedin.pegasus2avro.common.GlobalTags": { - "tags": [{ "tag": "urn:li:tag:Cypress" }] + "tags": [ + { + "tag": "urn:li:tag:Cypress" + } + ] } } ] @@ -1229,7 +1259,11 @@ }, { "com.linkedin.pegasus2avro.common.GlobalTags": { - "tags": [{ "tag": "urn:li:tag:Cypress" }] + "tags": [ + { + "tag": "urn:li:tag:Cypress" + } + ] } } ] @@ -1279,7 +1313,11 @@ }, { "com.linkedin.pegasus2avro.common.GlobalTags": { - "tags": [{ "tag": "urn:li:tag:Cypress" }] + "tags": [ + { + "tag": "urn:li:tag:Cypress" + } + ] } } ] @@ -1332,7 +1370,11 @@ }, { "com.linkedin.pegasus2avro.common.GlobalTags": { - "tags": [{ "tag": "urn:li:tag:Cypress" }] + "tags": [ + { + "tag": "urn:li:tag:Cypress" + } + ] } } ] @@ -1371,7 +1413,11 @@ }, { "com.linkedin.pegasus2avro.common.GlobalTags": { - "tags": [{ "tag": "urn:li:tag:Cypress" }] + "tags": [ + { + "tag": "urn:li:tag:Cypress" + } + ] } } ] @@ -1413,7 +1459,11 @@ }, { "com.linkedin.pegasus2avro.common.GlobalTags": { - "tags": [{ "tag": "urn:li:tag:Cypress" }] + "tags": [ + { + "tag": "urn:li:tag:Cypress" + } + ] } } ] @@ -1459,7 +1509,11 @@ }, { "com.linkedin.pegasus2avro.common.GlobalTags": { - "tags": [{ "tag": "urn:li:tag:Cypress" }] + "tags": [ + { + "tag": "urn:li:tag:Cypress" + } + ] } } ] @@ -1521,7 +1575,11 @@ }, { "com.linkedin.pegasus2avro.common.GlobalTags": { - "tags": [{ "tag": "urn:li:tag:Cypress" }] + "tags": [ + { + "tag": "urn:li:tag:Cypress" + } + ] } } ] @@ -1758,7 +1816,11 @@ }, { "com.linkedin.pegasus2avro.common.GlobalTags": { - "tags": [{ "tag": "urn:li:tag:CypressFeatureTag" }] + "tags": [ + { + "tag": "urn:li:tag:CypressFeatureTag" + } + ] } } ] @@ -1785,7 +1847,11 @@ }, { "com.linkedin.pegasus2avro.common.GlobalTags": { - "tags": [{ "tag": "urn:li:tag:CypressPrimaryKeyTag" }] + "tags": [ + { + "tag": "urn:li:tag:CypressPrimaryKeyTag" + } + ] } } ] @@ -2137,5 +2203,17 @@ "contentType": "application/json" }, "systemMetadata": null + }, + { + "auditHeader": null, + "entityType": "schemaField", + "entityUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_deleted,PROD),user_id)", + "changeType": "UPSERT", + "aspectName": "documentation", + "aspect": { + "value": "{\"documentations\":[{\"attribution\":{\"actor\":\"urn:li:corpuser:__datahub_system\",\"source\":\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_created,PROD),user_id)\",\"sourceDetail\":{\"actor\":\"urn:li:corpuser:shirshanka@acryl.io\",\"origin\":\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_created,PROD),user_id)\",\"propagated\":\"true\"},\"time\":1721422917808},\"documentation\":\"Unique identifier of user profile.\"}]}", + "contentType": "application/json" + }, + "systemMetadata": null } ] diff --git a/smoke-test/tests/openapi/test_openapi.py b/smoke-test/tests/openapi/test_openapi.py index 6561ee6d5c5cc9..20398e0e581685 100644 --- a/smoke-test/tests/openapi/test_openapi.py +++ b/smoke-test/tests/openapi/test_openapi.py @@ -64,6 +64,7 @@ def evaluate_test(test_name, test_data): actual_resp.json(), req_resp["response"]["json"], exclude_regex_paths=exclude_regex_paths, + ignore_order=True, ) assert not diff else: @@ -81,11 +82,12 @@ def evaluate_test(test_name, test_data): raise e -def run_tests(fixture_glob, num_workers=3): +def run_tests(fixture_globs, num_workers=3): with concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) as executor: futures = [] - for test_fixture, test_data in load_tests(fixture_glob=fixture_glob): - futures.append(executor.submit(evaluate_test, test_fixture, test_data)) + for fixture_glob in fixture_globs: + for test_fixture, test_data in load_tests(fixture_glob=fixture_glob): + futures.append(executor.submit(evaluate_test, test_fixture, test_data)) for future in concurrent.futures.as_completed(futures): logger.info(future.result()) @@ -93,7 +95,7 @@ def run_tests(fixture_glob, num_workers=3): @pytest.mark.dependency(depends=["test_healthchecks"]) def test_openapi_all(): - run_tests(fixture_glob="tests/openapi/**/*.json", num_workers=10) + run_tests(fixture_globs=["tests/openapi/*/*.json"], num_workers=10) # @pytest.mark.dependency(depends=["test_healthchecks"])