From b252f782c56327175a1a0bddc95b5c417db285a1 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Tue, 14 Jan 2025 17:04:49 -0800 Subject: [PATCH 01/10] feat(build): use remote gradle cache (#12344) --- settings.gradle | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/settings.gradle b/settings.gradle index 77d0706549a439..437a353f210ac4 100644 --- a/settings.gradle +++ b/settings.gradle @@ -79,6 +79,20 @@ include ':metadata-service:openapi-servlet:models' include ':metadata-integration:java:datahub-schematron:lib' include ':metadata-integration:java:datahub-schematron:cli' +buildCache { + def depotSecret = System.getenv('DEPOT_TOKEN'); + + remote(HttpBuildCache) { + url = 'https://cache.depot.dev' + enabled = depotSecret != null + push = true + credentials { + username = '' + password = depotSecret + } + } +} + def installPreCommitHooks() { def preCommitInstalled = false try { @@ -116,7 +130,7 @@ def installPreCommitHooks() { def stderr = new StringBuilder() installHooksProcess.waitForProcessOutput(stdout, stderr) if (installHooksProcess.exitValue() != 0) { - println "Failed to install hooks: ${stderr}" + println "Failed to install hooks: ${stdout}" return } println "Hooks output: ${stdout}" From a0575329848d65eafb455a3f400e8f47bc7e9bb7 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Tue, 14 Jan 2025 19:35:36 -0600 Subject: [PATCH 02/10] feat(docker-profiles): version mixing & docs (#12342) --- docker/build.gradle | 6 +----- docker/profiles/README.md | 28 +++++++++++++++++++++++++- docker/profiles/docker-compose.gms.yml | 16 +++++++-------- 3 files changed, 36 insertions(+), 14 deletions(-) diff --git a/docker/build.gradle b/docker/build.gradle index 576e47a53e6ef5..0070d814286cf0 100644 --- a/docker/build.gradle +++ b/docker/build.gradle @@ -42,7 +42,6 @@ ext { modules: python_services_modules + backend_profile_modules + [':datahub-frontend'], isDebug: true ], - 'quickstartDebugConsumers': [ profile: 'debug-consumers', modules: python_services_modules + backend_profile_modules + [':datahub-frontend', @@ -50,7 +49,6 @@ ext { ':metadata-jobs:mae-consumer-job'], isDebug: true ], - 'quickstartPg': [ profile: 'quickstart-postgres', modules: (backend_profile_modules - [':docker:mysql-setup']) + [ @@ -108,9 +106,7 @@ dockerCompose { } // Common environment variables - environment.put 'DATAHUB_VERSION', config.isDebug ? - System.getenv("DATAHUB_VERSION") ?: "v${version}" : - "v${version}" + environment.put 'DATAHUB_VERSION', System.getenv("DATAHUB_VERSION") ?: "v${version}" environment.put 'DATAHUB_TELEMETRY_ENABLED', 'false' environment.put "METADATA_TESTS_ENABLED", "true" environment.put "DATAHUB_REPO", "${docker_registry}" diff --git a/docker/profiles/README.md b/docker/profiles/README.md index fb3c9e3c84a7a2..192fde3130a895 100644 --- a/docker/profiles/README.md +++ b/docker/profiles/README.md @@ -101,4 +101,30 @@ Runs everything except for the GMS. Useful for running just a local (non-docker) | debug-cassandra | | | X | | X | X | X | X | | | X | X | | | debug-consumers | X | | | | X | X | X | X | X | X | X | X | | | debug-neo4j | X | | | X | X | X | X | X | | | X | X | | -| debug-elasticsearch | X | | | | X | X | X | X | | | X | | X | \ No newline at end of file +| debug-elasticsearch | X | | | | X | X | X | X | | | X | | X | + +## Advanced Setups + +### Version Mixing + +In some cases, it might be useful to debug upgrade scenarios where there are intentional version miss-matches. It is possible +to override individual component versions. + +Note: This only works for `non-debug` profiles because of the file mounts when in `debug` which would run older containers +but still pickup the latest application jars. + +In this example we are interested in upgrading two components (the `mae-consumer` and the `mce-consumer`) to a fresh build `v0.15.1-SNAPSHOT` +while maintaining older components on `v0.14.1` (especially the `system-update` container). + +This configuration reproduces the situation where the consumers were upgraded prior to running the latest version of `system-update`. In this +scenario we expect the consumers to block their startup waiting for the successful completion of a newer `system-update`. + +`DATAHUB_VERSION` - specifies the default component version of `v0.14.1` +`DATAHUB_MAE_VERSION` - specifies an override of just the `mae-consumer` to version `v0.15.1-SNAPSHOT`[1] +`DATAHUB_MCE_VERSION` - specifies an override of just the `mce-consumer` to version `v0.15.1-SNAPSHOT`[1] + +```shell + DATAHUB_MAE_VERSION="v0.15.1-SNAPSHOT" DATAHUB_MCE_VERSION="v0.15.1-SNAPSHOT" DATAHUB_VERSION="v0.14.1" ./gradlew quickstart +``` + +[1] Image versions were `v0.15.1-SNAPSHOT` built locally prior to running the command. diff --git a/docker/profiles/docker-compose.gms.yml b/docker/profiles/docker-compose.gms.yml index ada7df51e20bef..2147d6b5a0247f 100644 --- a/docker/profiles/docker-compose.gms.yml +++ b/docker/profiles/docker-compose.gms.yml @@ -54,7 +54,7 @@ x-datahub-dev-telemetry-env: &datahub-dev-telemetry-env ################################# x-datahub-system-update-service: &datahub-system-update-service hostname: datahub-system-update - image: ${DATAHUB_UPGRADE_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-upgrade}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_UPGRADE_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-upgrade}:${DATAHUB_UPDATE_VERSION:-${DATAHUB_VERSION:-head}} command: - -u - SystemUpdate @@ -73,7 +73,7 @@ x-datahub-system-update-service: &datahub-system-update-service x-datahub-system-update-service-dev: &datahub-system-update-service-dev <<: *datahub-system-update-service - image: ${DATAHUB_UPGRADE_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-upgrade}:${DATAHUB_VERSION:-debug} + image: ${DATAHUB_UPGRADE_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-upgrade}:${DATAHUB_UPDATE_VERSION:-${DATAHUB_VERSION:-debug}} ports: - ${DATAHUB_MAPPED_UPGRADE_DEBUG_PORT:-5003}:5003 environment: &datahub-system-update-dev-env @@ -92,7 +92,7 @@ x-datahub-system-update-service-dev: &datahub-system-update-service-dev ################################# x-datahub-gms-service: &datahub-gms-service hostname: datahub-gms - image: ${DATAHUB_GMS_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-gms}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_GMS_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-gms}:${DATAHUB_GMS_VERSION:-${DATAHUB_VERSION:-head}} ports: - ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080 env_file: @@ -118,7 +118,7 @@ x-datahub-gms-service: &datahub-gms-service x-datahub-gms-service-dev: &datahub-gms-service-dev <<: *datahub-gms-service - image: ${DATAHUB_GMS_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-gms}:${DATAHUB_VERSION:-debug} + image: ${DATAHUB_GMS_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-gms}:${DATAHUB_GMS_VERSION:-${DATAHUB_VERSION:-debug}} ports: - ${DATAHUB_MAPPED_GMS_DEBUG_PORT:-5001}:5001 - ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080 @@ -150,7 +150,7 @@ x-datahub-gms-service-dev: &datahub-gms-service-dev ################################# x-datahub-mae-consumer-service: &datahub-mae-consumer-service hostname: datahub-mae-consumer - image: ${DATAHUB_MAE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mae-consumer}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_MAE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mae-consumer}:${DATAHUB_MAE_VERSION:-${DATAHUB_VERSION:-head}} ports: - 9091:9091 env_file: @@ -163,7 +163,7 @@ x-datahub-mae-consumer-service: &datahub-mae-consumer-service x-datahub-mae-consumer-service-dev: &datahub-mae-consumer-service-dev <<: *datahub-mae-consumer-service - image: ${DATAHUB_MAE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mae-consumer}:${DATAHUB_VERSION:-debug} + image: ${DATAHUB_MAE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mae-consumer}:${DATAHUB_MAE_VERSION:-${DATAHUB_VERSION:-debug}} environment: <<: [*datahub-dev-telemetry-env, *datahub-mae-consumer-env] ENTITY_VERSIONING_ENABLED: ${ENTITY_VERSIONING_ENABLED:-true} @@ -178,7 +178,7 @@ x-datahub-mae-consumer-service-dev: &datahub-mae-consumer-service-dev ################################# x-datahub-mce-consumer-service: &datahub-mce-consumer-service hostname: datahub-mce-consumer - image: ${DATAHUB_MCE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mce-consumer}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_MCE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mce-consumer}:${DATAHUB_MCE_VERSION:-${DATAHUB_VERSION:-head}} ports: - 9090:9090 env_file: @@ -193,7 +193,7 @@ x-datahub-mce-consumer-service: &datahub-mce-consumer-service x-datahub-mce-consumer-service-dev: &datahub-mce-consumer-service-dev <<: *datahub-mce-consumer-service - image: ${DATAHUB_MCE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mce-consumer}:${DATAHUB_VERSION:-debug} + image: ${DATAHUB_MCE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mce-consumer}:${DATAHUB_MCE_VERSION:-${DATAHUB_VERSION:-debug}} environment: <<: [*datahub-dev-telemetry-env, *datahub-mce-consumer-env] ENTITY_VERSIONING_ENABLED: ${ENTITY_VERSIONING_ENABLED:-true} From 3905c8ee4146c93a06653dbcd690775ae36bef0f Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Tue, 14 Jan 2025 19:36:02 -0600 Subject: [PATCH 03/10] docs(async-api): addition to known issues (#12339) --- docs/how/updating-datahub.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index 68b41c907c6ad6..eb5a792216d981 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -1,3 +1,8 @@ +# Known Issues + +- Async APIs - DataHub's asynchronous APIs perform only basic schema validation when receiving MCP requests, similar to direct production to MCP Kafka topics. While requests must conform to the MCP schema to be accepted, actual processing happens later in the pipeline. Any processing failures that occur after the initial acceptance are captured in the Failed MCP topic, but these failures are not immediately surfaced to the API caller since they happen asynchronously. + + # Updating DataHub