Skip to content

Commit

Permalink
Merge branch 'master' into feat_snowflake_swap
Browse files Browse the repository at this point in the history
  • Loading branch information
mayurinehate authored Oct 21, 2024
2 parents 3167a42 + 554288b commit cd59907
Show file tree
Hide file tree
Showing 172 changed files with 9,814 additions and 9,196 deletions.
9 changes: 4 additions & 5 deletions .github/workflows/build-and-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,17 +67,16 @@ jobs:
timezoneLinux: ${{ matrix.timezone }}
- name: Check out the repo
uses: acryldata/sane-checkout-action@v3
- uses: actions/setup-python@v5
with:
python-version: "3.10"
cache: pip
- name: Set up JDK 17
uses: actions/setup-java@v4
with:
distribution: "zulu"
java-version: 17
- uses: gradle/actions/setup-gradle@v3
- uses: actions/setup-python@v5
if: ${{ needs.setup.outputs.ingestion_change == 'true' }}
with:
python-version: "3.10"
cache: pip
- name: Gradle build (and test) for NOT metadata ingestion
if: ${{ matrix.command == 'except_metadata_ingestion' && needs.setup.outputs.backend_change == 'true' }}
run: |
Expand Down
42 changes: 42 additions & 0 deletions .github/workflows/contributor-open-pr-comment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
name: PR Comment

on:
pull_request:
types: [opened]

permissions:
pull-requests: write

jobs:
post-pr-opened-comment:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v3

- name: Get and Format Username (PR only)
if: github.event_name == 'pull_request'
run: |
formatted_username=$(echo "${{ github.event.pull_request.user.login }}" | tr '[:upper:]' '[:lower:]' | sed 's/ /-/g')
echo "FORMATTED_USERNAME=$formatted_username" >> $GITHUB_ENV
- name: Create Comment (PR only)
if: github.event_name == 'pull_request'
uses: actions/github-script@v6
with:
script: |
if (context.payload.pull_request) {
const prUser = process.env.FORMATTED_USERNAME;
const url = `https://contributors.datahubproject.io/${prUser}`;
const body = `Hello @${prUser} :smile: \n\n Thank you so much for opening a pull request!\n\n![Image](https://contributors.datahubproject.io/api/og?userId=${{ github.event.pull_request.user.login }})\nYou can check out your contributor card and see all your past stats [here](${url})!`;
// Create a comment on the PR
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.payload.pull_request.number,
body: body
});
} else {
console.log('Not a pull request event.');
}
24 changes: 16 additions & 8 deletions .github/workflows/docker-unified.yml
Original file line number Diff line number Diff line change
Expand Up @@ -760,14 +760,18 @@ jobs:
needs: [setup, datahub_ingestion_base_slim_build]
if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }}
steps:
- name: Check out the repo
uses: acryldata/sane-checkout-action@v3
- uses: actions/setup-python@v5
with:
python-version: "3.10"
cache: "pip"
- name: Set up JDK 17
uses: actions/setup-java@v4
with:
distribution: "zulu"
java-version: 17
- uses: gradle/actions/setup-gradle@v3
- name: Check out the repo
uses: acryldata/sane-checkout-action@v3
- name: Build codegen
if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish =='true' }}
run: ./gradlew :metadata-ingestion:codegen
Expand Down Expand Up @@ -852,14 +856,18 @@ jobs:
needs: [setup, datahub_ingestion_base_full_build]
if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }}
steps:
- name: Check out the repo
uses: acryldata/sane-checkout-action@v3
- uses: actions/setup-python@v5
with:
python-version: "3.10"
cache: "pip"
- name: Set up JDK 17
uses: actions/setup-java@v4
with:
distribution: "zulu"
java-version: 17
- uses: gradle/actions/setup-gradle@v3
- name: Check out the repo
uses: acryldata/sane-checkout-action@v3
- name: Build codegen
if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }}
run: ./gradlew :metadata-ingestion:codegen
Expand Down Expand Up @@ -983,16 +991,16 @@ jobs:
run: df -h . && docker images
- name: Check out the repo
uses: acryldata/sane-checkout-action@v3
- uses: actions/setup-python@v5
with:
python-version: "3.10"
cache: "pip"
- name: Set up JDK 17
uses: actions/setup-java@v4
with:
distribution: "zulu"
java-version: 17
- uses: gradle/actions/setup-gradle@v3
- uses: actions/setup-python@v5
with:
python-version: "3.10"
cache: "pip"
- name: Login to DockerHub
uses: docker/login-action@v3
if: ${{ needs.setup.outputs.docker-login == 'true' }}
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/metadata-ingestion.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ concurrency:
jobs:
metadata-ingestion:
runs-on: ubuntu-latest
timeout-minutes: 40
env:
SPARK_VERSION: 3.3.2
DATAHUB_TELEMETRY_ENABLED: false
Expand Down
9 changes: 4 additions & 5 deletions .github/workflows/metadata-io.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,17 +57,16 @@ jobs:
- name: Disk Check
run: df -h . && docker images
- uses: acryldata/sane-checkout-action@v3
- uses: actions/setup-python@v5
with:
python-version: "3.10"
cache: "pip"
- name: Set up JDK 17
uses: actions/setup-java@v4
with:
distribution: "zulu"
java-version: 17
- uses: gradle/actions/setup-gradle@v3
- uses: actions/setup-python@v5
if: ${{ needs.setup.outputs.ingestion_change == 'true' }}
with:
python-version: "3.10"
cache: "pip"
- name: Gradle build (and test)
run: |
./gradlew :metadata-io:test
Expand Down
11 changes: 10 additions & 1 deletion datahub-web-react/src/app/ingest/source/builder/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ import csvLogo from '../../../../images/csv-logo.png';
import qlikLogo from '../../../../images/qliklogo.png';
import sigmaLogo from '../../../../images/sigmalogo.png';
import sacLogo from '../../../../images/saclogo.svg';
import datahubLogo from '../../../../images/datahublogo.png';

export const ATHENA = 'athena';
export const ATHENA_URN = `urn:li:dataPlatform:${ATHENA}`;
Expand Down Expand Up @@ -125,6 +126,11 @@ export const SIGMA = 'sigma';
export const SIGMA_URN = `urn:li:dataPlatform:${SIGMA}`;
export const SAC = 'sac';
export const SAC_URN = `urn:li:dataPlatform:${SAC}`;
export const DATAHUB = 'datahub';
export const DATAHUB_GC = 'datahub-gc';
export const DATAHUB_LINEAGE_FILE = 'datahub-lineage-file';
export const DATAHUB_BUSINESS_GLOSSARY = 'datahub-business-glossary';
export const DATAHUB_URN = `urn:li:dataPlatform:${DATAHUB}`;

export const PLATFORM_URN_TO_LOGO = {
[ATHENA_URN]: athenaLogo,
Expand Down Expand Up @@ -165,6 +171,7 @@ export const PLATFORM_URN_TO_LOGO = {
[QLIK_SENSE_URN]: qlikLogo,
[SIGMA_URN]: sigmaLogo,
[SAC_URN]: sacLogo,
[DATAHUB_URN]: datahubLogo,
};

export const SOURCE_TO_PLATFORM_URN = {
Expand All @@ -178,5 +185,7 @@ export const SOURCE_TO_PLATFORM_URN = {
[SNOWFLAKE_USAGE]: SNOWFLAKE_URN,
[STARBURST_TRINO_USAGE]: TRINO_URN,
[DBT_CLOUD]: DBT_URN,
[VERTICA]: VERTICA_URN,
[DATAHUB_GC]: DATAHUB_URN,
[DATAHUB_LINEAGE_FILE]: DATAHUB_URN,
[DATAHUB_BUSINESS_GLOSSARY]: DATAHUB_URN,
};
3 changes: 3 additions & 0 deletions docker/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,9 @@ dockerCompose {
isRequiredBy(tasks.named('quickstartDebug'))
composeAdditionalArgs = ['--profile', 'debug']

if (System.getenv().containsKey("DATAHUB_VERSION")) {
environment.put 'DATAHUB_VERSION', System.getenv("DATAHUB_VERSION")
}
environment.put 'DATAHUB_TELEMETRY_ENABLED', 'false' // disabled when built locally

useComposeFiles = ['profiles/docker-compose.yml']
Expand Down
2 changes: 1 addition & 1 deletion docker/profiles/docker-compose.frontend.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ x-datahub-frontend-service: &datahub-frontend-service

x-datahub-frontend-service-dev: &datahub-frontend-service-dev
<<: *datahub-frontend-service
image: ${DATAHUB_FRONTEND_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-frontend-react}:debug
image: ${DATAHUB_FRONTEND_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-frontend-react}:${DATAHUB_VERSION:-debug}
ports:
- ${DATAHUB_MAPPED_FRONTEND_DEBUG_PORT:-5002}:5002
- ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002
Expand Down
8 changes: 4 additions & 4 deletions docker/profiles/docker-compose.gms.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ x-datahub-system-update-service: &datahub-system-update-service

x-datahub-system-update-service-dev: &datahub-system-update-service-dev
<<: *datahub-system-update-service
image: ${DATAHUB_UPGRADE_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-upgrade}:debug
image: ${DATAHUB_UPGRADE_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-upgrade}:${DATAHUB_VERSION:-debug}
ports:
- ${DATAHUB_MAPPED_UPGRADE_DEBUG_PORT:-5003}:5003
environment: &datahub-system-update-dev-env
Expand Down Expand Up @@ -115,7 +115,7 @@ x-datahub-gms-service: &datahub-gms-service

x-datahub-gms-service-dev: &datahub-gms-service-dev
<<: *datahub-gms-service
image: ${DATAHUB_GMS_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-gms}:debug
image: ${DATAHUB_GMS_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-gms}:${DATAHUB_VERSION:-debug}
ports:
- ${DATAHUB_MAPPED_GMS_DEBUG_PORT:-5001}:5001
- ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080
Expand Down Expand Up @@ -159,7 +159,7 @@ x-datahub-mae-consumer-service: &datahub-mae-consumer-service

x-datahub-mae-consumer-service-dev: &datahub-mae-consumer-service-dev
<<: *datahub-mae-consumer-service
image: ${DATAHUB_MAE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mae-consumer}:debug
image: ${DATAHUB_MAE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mae-consumer}:${DATAHUB_VERSION:-debug}
environment:
<<: [*datahub-dev-telemetry-env, *datahub-mae-consumer-env]
volumes:
Expand All @@ -185,7 +185,7 @@ x-datahub-mce-consumer-service: &datahub-mce-consumer-service

x-datahub-mce-consumer-service-dev: &datahub-mce-consumer-service-dev
<<: *datahub-mce-consumer-service
image: ${DATAHUB_MCE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mce-consumer}:debug
image: ${DATAHUB_MCE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mce-consumer}:${DATAHUB_VERSION:-debug}
environment:
<<: [*datahub-dev-telemetry-env, *datahub-mce-consumer-env]
volumes:
Expand Down
10 changes: 5 additions & 5 deletions docker/profiles/docker-compose.prerequisites.yml
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ services:
mysql-setup-dev:
<<: *mysql-setup
profiles: *mysql-profiles-dev
image: ${DATAHUB_MYSQL_SETUP_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mysql-setup}:debug
image: ${DATAHUB_MYSQL_SETUP_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mysql-setup}:${DATAHUB_VERSION:-debug}
postgres:
profiles: *postgres-profiles
hostname: postgres
Expand Down Expand Up @@ -166,7 +166,7 @@ services:
postgres-setup-dev:
<<: *postgres-setup
profiles: *postgres-profiles-dev
image: ${DATAHUB_POSTGRES_SETUP_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-postgres-setup}:debug
image: ${DATAHUB_POSTGRES_SETUP_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-postgres-setup}:${DATAHUB_VERSION:-debug}
cassandra:
profiles: *cassandra-profiles
hostname: cassandra
Expand Down Expand Up @@ -272,7 +272,7 @@ services:
environment:
<<: *kafka-setup-env
DATAHUB_PRECREATE_TOPICS: ${DATAHUB_PRECREATE_TOPICS:-true}
image: ${DATAHUB_KAFKA_SETUP_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-kafka-setup}:debug
image: ${DATAHUB_KAFKA_SETUP_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-kafka-setup}:${DATAHUB_VERSION:-debug}
elasticsearch:
profiles: *elasticsearch-profiles
hostname: search
Expand All @@ -296,7 +296,7 @@ services:
volumes:
- esdata:/usr/share/elasticsearch/data
elasticsearch-setup-dev: &elasticsearch-setup-dev
image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-elasticsearch-setup}:debug
image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-debug}
profiles: *elasticsearch-profiles
hostname: elasticsearch-setup
env_file: elasticsearch-setup/env/docker.env
Expand Down Expand Up @@ -347,7 +347,7 @@ services:
<<: *opensearch-setup
profiles: *opensearch-profiles-dev
hostname: opensearch-setup-dev
image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-elasticsearch-setup}:debug
image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-debug}
environment:
<<: *search-datastore-environment
USE_AWS_ELASTICSEARCH: ${USE_AWS_ELASTICSEARCH:-true}
Expand Down
8 changes: 8 additions & 0 deletions docs-website/docusaurus.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,14 @@ module.exports = {
// isCloseable: false,
// },
// }),
announcementBar: {
id: "announcement-2",
content:
'<div style="display: flex; justify-content: center; align-items: center;width: 100%;"><!--img src="/img/acryl-logo-white-mark.svg" / --><div style="font-size: .8rem; font-weight: 600; background-color: white; color: #111; padding: 0px 8px; border-radius: 4px; margin-right:12px;">NEW</div><p><span>Join us at Metadata & AI Summit, Oct. 29 & 30!</span></p><a href="http://www.acryldata.io/conference?utm_source=datahub_web&utm_medium=metadata_ai_2024&utm_campaign=home_banner" target="_blank" class="button">Register →</a></div>',
backgroundColor: "#111",
textColor: "#ffffff",
isCloseable: false,
},
colorMode: {
// Only support light mode.
defaultMode: 'light',
Expand Down
2 changes: 1 addition & 1 deletion docs-website/src/pages/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ function Home() {
return !siteConfig.customFields.isSaas ? (
<Layout
title={siteConfig.tagline}
description="DataHub is a data discovery application built on an extensible data catalog that helps you tame the complexity of diverse data ecosystems."
description="DataHub is a metadata management platform, spaning data discovery, observability and governance. It helps you tame the complexity of diverse data ecosystems."
>
{isTourModalVisible ? (
<div className="tourModal">
Expand Down
5 changes: 3 additions & 2 deletions docs-website/src/styles/global.scss
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
--ifm-navbar-item-padding-horizontal: 1rem;

/* Announcement Bar */
--docusaurus-announcement-bar-height: 60px !important;
--docusaurus-announcement-bar-height: 48px !important;

/* Rule */
--ifm-hr-border-width: 1px 0 0 0;
Expand Down Expand Up @@ -141,8 +141,9 @@ div[class^="announcementBar"] {
}

a {
color: var(--ifm-button-color);
color: #EFB300;
text-decoration: none;
font-size: 1rem
}
}
}
Expand Down
12 changes: 12 additions & 0 deletions docs/advanced/mcp-mcl.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,18 @@ To mitigate these downsides, we are committed to providing cross-language client

Ultimately, we intend to realize a state in which the Entities and Aspect schemas can be altered without requiring generated code and without maintaining a single mega-model schema (looking at you, Snapshot.pdl). The intention is that changes to the metadata model become even easier than they are today.

### Synchronous Ingestion Architecture

<p align="center">
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/advanced/mcp-mcl/sync-ingestion.svg"/>
</p>

### Asynchronous Ingestion Architecture

<p align="center">
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/advanced/mcp-mcl/async-ingestion.svg"/>
</p>

## Modeling

A Metadata Change Proposal is defined (in PDL) as follows
Expand Down
1 change: 0 additions & 1 deletion docs/businessattributes.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ Taking the example of "United States- Social Security Number", if an application
What you need to create/update and associate business attributes to dataset schema field

* **Manage Business Attributes** platform privilege to create/update/delete business attributes.
* **Edit Dataset Column Business Attribute** metadata privilege to associate business attributes to dataset schema field.

## Using Business Attributes
As of now Business Attributes can only be created through UI
Expand Down
19 changes: 19 additions & 0 deletions docs/how/updating-datahub.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,31 @@ This file documents any backwards-incompatible changes in DataHub and assists pe

## Next

- #11560 - The PowerBI ingestion source configuration option include_workspace_name_in_dataset_urn determines whether the workspace name is included in the PowerBI dataset's URN.<br/> PowerBI allows to have identical name of semantic model and their tables across the workspace, It will overwrite the semantic model in-case of multi-workspace ingestion.<br/>
Entity urn with `include_workspace_name_in_dataset_urn: false`
```
urn:li:dataset:(urn:li:dataPlatform:powerbi,[<PlatformInstance>.]<SemanticModelName>.<TableName>,<ENV>)
```

Entity urn with `include_workspace_name_in_dataset_urn: true`
```
urn:li:dataset:(urn:li:dataPlatform:powerbi,[<PlatformInstance>.].<WorkspaceName>.<SemanticModelName>.<TableName>,<ENV>)
```

The config `include_workspace_name_in_dataset_urn` is default to `false` for backward compatiblity, However, we recommend enabling this flag after performing the necessary cleanup.
If stateful ingestion is enabled, running ingestion with the latest CLI version will handle the cleanup automatically. Otherwise, we recommend soft deleting all powerbi data via the DataHub CLI:
`datahub delete --platform powerbi --soft` and then re-ingest with the latest CLI version, ensuring the `include_workspace_name_in_dataset_urn` configuration is set to true.

### Breaking Changes

- #11486 - Deprecated Criterion filters using `value`. Use `values` instead. This also deprecates the ability to use comma delimited string to represent multiple values using `value`.
- #11484 - Metadata service authentication enabled by default
- #11484 - Rest API authorization enabled by default
- #10472 - `SANDBOX` added as a FabricType. No rollbacks allowed once metadata with this fabric type is added without manual cleanups in databases.
- #11619 - schema field/column paths can no longer be empty strings
- #11619 - schema field/column paths can no longer be duplicated within the schema
- #11570 - The `DatahubClientConfig`'s server field no longer defaults to `http://localhost:8080`. Be sure to explicitly set this.
- #11570 - If a `datahub_api` is explicitly passed to a stateful ingestion config provider, it will be used. We previously ignored it if the pipeline context also had a graph object.

### Potential Downtime

Expand Down
Loading

0 comments on commit cd59907

Please sign in to comment.