Skip to content

Commit

Permalink
Merge branch 'master' into feat-add-iceberg-lineage-support-in-snowflake
Browse files Browse the repository at this point in the history
  • Loading branch information
alisa-aylward-toast authored Aug 22, 2024
2 parents b7540e6 + 0927c63 commit ab20013
Show file tree
Hide file tree
Showing 68 changed files with 4,363 additions and 1,167 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/build-and-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ jobs:
-x :metadata-ingestion-modules:airflow-plugin:check \
-x :metadata-ingestion-modules:dagster-plugin:build \
-x :metadata-ingestion-modules:dagster-plugin:check \
-x :metadata-ingestion-modules:gx-plugin:build \
-x :metadata-ingestion-modules:gx-plugin:check \
-x :datahub-frontend:build \
-x :datahub-web-react:build \
--parallel
Expand Down
87 changes: 87 additions & 0 deletions .github/workflows/gx-plugin.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
name: GX Plugin
on:
push:
branches:
- master
paths:
- ".github/workflows/gx-plugin.yml"
- "metadata-ingestion-modules/gx-plugin/**"
- "metadata-ingestion/**"
- "metadata-models/**"
pull_request:
branches:
- master
paths:
- ".github/**"
- "metadata-ingestion-modules/gx-plugin/**"
- "metadata-ingestion/**"
- "metadata-models/**"
release:
types: [published]

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

jobs:
gx-plugin:
runs-on: ubuntu-latest
env:
SPARK_VERSION: 3.0.3
DATAHUB_TELEMETRY_ENABLED: false
strategy:
matrix:
python-version: ["3.8", "3.10"]
include:
- python-version: "3.8"
extraPythonRequirement: "great-expectations~=0.15.12"
- python-version: "3.10"
extraPythonRequirement: "great-expectations~=0.16.0 numpy~=1.26.0"
- python-version: "3.11"
extraPythonRequirement: "great-expectations~=0.17.0"
fail-fast: false
steps:
- name: Set up JDK 17
uses: actions/setup-java@v3
with:
distribution: "zulu"
java-version: 17
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
cache: "pip"
- name: Install dependencies
run: ./metadata-ingestion/scripts/install_deps.sh
- name: Install GX package and test (extras ${{ matrix.extraPythonRequirement }})
run: ./gradlew -Pextra_pip_requirements='${{ matrix.extraPythonRequirement }}' :metadata-ingestion-modules:gx-plugin:lint :metadata-ingestion-modules:gx-plugin:testQuick
- name: pip freeze show list installed
if: always()
run: source metadata-ingestion-modules/gx-plugin/venv/bin/activate && pip freeze
- uses: actions/upload-artifact@v3
if: ${{ always() && matrix.python-version == '3.11' && matrix.extraPythonRequirement == 'great-expectations~=0.17.0' }}
with:
name: Test Results (GX Plugin ${{ matrix.python-version}})
path: |
**/build/reports/tests/test/**
**/build/test-results/test/**
**/junit.*.xml
- name: Upload coverage to Codecov
if: always()
uses: codecov/codecov-action@v3
with:
token: ${{ secrets.CODECOV_TOKEN }}
directory: .
fail_ci_if_error: false
flags: gx-${{ matrix.python-version }}-${{ matrix.extraPythonRequirement }}
name: pytest-gx
verbose: true

event-file:
runs-on: ubuntu-latest
steps:
- name: Upload
uses: actions/upload-artifact@v3
with:
name: Event File
path: ${{ github.event_path }}
2 changes: 1 addition & 1 deletion .github/workflows/test-results.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: Test Results

on:
workflow_run:
workflows: ["build & test", "metadata ingestion", "Airflow Plugin", "Dagster Plugin"]
workflows: ["build & test", "metadata ingestion", "Airflow Plugin", "Dagster Plugin", "GX Plugin"]
types:
- completed

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ public static FormActorAssignment mapFormActorAssignment(
if (input.getGroups() != null) {
UrnArray groupUrns = new UrnArray();
input.getGroups().forEach(group -> groupUrns.add(UrnUtils.getUrn(group)));
result.setUsers(groupUrns);
result.setGroups(groupUrns);
}

return result;
Expand Down
2 changes: 1 addition & 1 deletion datahub-upgrade/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ dependencies {
// mock internal schema registry
implementation externalDependency.kafkaAvroSerde
implementation externalDependency.kafkaAvroSerializer
implementation "org.apache.kafka:kafka_2.12:3.7.0"
implementation "org.apache.kafka:kafka_2.12:3.7.1"

implementation externalDependency.slf4jApi
compileOnly externalDependency.lombok
Expand Down
2 changes: 1 addition & 1 deletion docker/kafka-setup/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ ARG ALPINE_REPO_URL
ARG APACHE_DOWNLOAD_URL
ARG GITHUB_REPO_URL

ENV KAFKA_VERSION=3.7.0
ENV KAFKA_VERSION=3.7.1
ENV SCALA_VERSION=2.13

LABEL name="kafka" version=${KAFKA_VERSION}
Expand Down
1 change: 1 addition & 0 deletions docs-website/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ task yarnGenerate(type: YarnTask, dependsOn: [yarnInstall,
':metadata-ingestion:buildWheel',
':metadata-ingestion-modules:airflow-plugin:buildWheel',
':metadata-ingestion-modules:dagster-plugin:buildWheel',
':metadata-ingestion-modules:gx-plugin:buildWheel',
]) {
inputs.files(projectMdFiles)
outputs.cacheIf { true }
Expand Down
1 change: 1 addition & 0 deletions docs-website/generateDocsDir.ts
Original file line number Diff line number Diff line change
Expand Up @@ -573,6 +573,7 @@ function copy_python_wheels(): void {
"../metadata-ingestion/dist",
"../metadata-ingestion-modules/airflow-plugin/dist",
"../metadata-ingestion-modules/dagster-plugin/dist",
"../metadata-ingestion-modules/gx-plugin/dist",
];

const wheel_output_directory = path.join(STATIC_DIRECTORY, "wheels");
Expand Down
1 change: 1 addition & 0 deletions docs-website/sidebars.js
Original file line number Diff line number Diff line change
Expand Up @@ -917,6 +917,7 @@ module.exports = {
// "metadata-integration/java/openlineage-converter/README"
//"metadata-ingestion-modules/airflow-plugin/README"
//"metadata-ingestion-modules/dagster-plugin/README"
//"metadata-ingestion-modules/gx-plugin/README"
// "metadata-ingestion/schedule_docs/datahub", // we can delete this
// TODO: change the titles of these, removing the "What is..." portion from the sidebar"
// "docs/what/entity",
Expand Down
4 changes: 3 additions & 1 deletion docs/quick-ingestion-guides/bigquery/setup.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@ Please refer to the BigQuery [Permissions](https://cloud.google.com/iam/docs/per
You can always add/remove roles to Service Accounts later on. Please refer to the BigQuery [Manage access to projects, folders, and organizations](https://cloud.google.com/iam/docs/granting-changing-revoking-access) guide for more details.
:::

3. Create and download a [Service Account Key](https://cloud.google.com/iam/docs/creating-managing-service-account-keys). We will use this to set up authentication within DataHub.
3. To filter projects based on the `project_labels` configuration, first visit [cloudresourcemanager.googleapis.com](https://console.developers.google.com/apis/api/cloudresourcemanager.googleapis.com/overview) and enable the `Cloud Resource Manager API`

4. Create and download a [Service Account Key](https://cloud.google.com/iam/docs/creating-managing-service-account-keys). We will use this to set up authentication within DataHub.

The key file looks like this:

Expand Down
4 changes: 3 additions & 1 deletion metadata-ingestion-modules/airflow-plugin/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ def get_long_description():

_version: str = package_metadata["__version__"]
_self_pin = (
f"=={_version}" if not (_version.endswith("dev0") or "docker" in _version) else ""
f"=={_version}"
if not (_version.endswith(("dev0", "dev1")) or "docker" in _version)
else ""
)


Expand Down
14 changes: 3 additions & 11 deletions metadata-ingestion-modules/dagster-plugin/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ task installPackage(type: Exec, dependsOn: [environmentSetup, ':metadata-ingesti
outputs.file(sentinel_file)
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
"uv pip install -e . ${extra_pip_requirements} && " +
"${pip_install_command} -e . ${extra_pip_requirements} && " +
"touch ${sentinel_file}"
}

Expand All @@ -45,15 +45,11 @@ task installDev(type: Exec, dependsOn: [install]) {
outputs.file(sentinel_file)
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
"uv pip install -e .[dev] ${extra_pip_requirements} && " +
"${pip_install_command} -e .[dev] ${extra_pip_requirements} && " +
"touch ${sentinel_file}"
}

task lint(type: Exec, dependsOn: installDev) {
/*
The find/sed combo below is a temporary work-around for the following mypy issue with airflow 2.2.0:
"venv/lib/python3.8/site-packages/airflow/_vendor/connexion/spec.py:169: error: invalid syntax".
*/
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
"black --check --diff src/ tests/ examples/ && " +
Expand All @@ -77,7 +73,7 @@ task installDevTest(type: Exec, dependsOn: [installDev]) {
outputs.file(sentinel_file)
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
"uv pip install -e .[dev,integration-tests] ${extra_pip_requirements} && " +
"${pip_install_command} -e .[dev,integration-tests] ${extra_pip_requirements} && " +
"touch ${sentinel_file}"
}

Expand Down Expand Up @@ -105,10 +101,6 @@ task testQuick(type: Exec, dependsOn: installDevTest) {
}


task testFull(type: Exec, dependsOn: [testQuick, installDevTest]) {
commandLine 'bash', '-x', '-c',
"source ${venv_name}/bin/activate && pytest -m 'not slow_integration' -vv --continue-on-collection-errors --junit-xml=junit.full.xml"
}
task buildWheel(type: Exec, dependsOn: [environmentSetup]) {
commandLine 'bash', '-c', "source ${venv_name}/bin/activate && " +
'uv pip install build && RELEASE_VERSION="\${RELEASE_VERSION:-0.0.0.dev1}" RELEASE_SKIP_INSTALL=1 RELEASE_SKIP_UPLOAD=1 ./scripts/release.sh'
Expand Down
8 changes: 4 additions & 4 deletions metadata-ingestion-modules/dagster-plugin/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,17 @@ def get_long_description():

_version: str = package_metadata["__version__"]
_self_pin = (
f"=={_version}" if not (_version.endswith("dev0") or "docker" in _version) else ""
f"=={_version}"
if not (_version.endswith(("dev0", "dev1")) or "docker" in _version)
else ""
)

base_requirements = {
# Actual dependencies.
"dagster >= 1.3.3",
"dagit >= 1.3.3",
*rest_common,
# Ignoring the dependency below because it causes issues with the vercel built wheel install
# f"acryl-datahub[datahub-rest]{_self_pin}",
"acryl-datahub[datahub-rest]",
f"acryl-datahub[datahub-rest]{_self_pin}",
}

mypy_stubs = {
Expand Down
143 changes: 143 additions & 0 deletions metadata-ingestion-modules/gx-plugin/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
.envrc
src/datahub_gx_plugin/__init__.py.bak
.vscode/
output
pvenv36/
bq_credentials.json
/tmp
*.bak

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
.python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# Generated classes
src/datahub/metadata/
wheels/
junit.quick.xml
4 changes: 4 additions & 0 deletions metadata-ingestion-modules/gx-plugin/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Datahub GX Plugin

See the DataHub GX docs for details.

Loading

0 comments on commit ab20013

Please sign in to comment.