Skip to content

Commit

Permalink
more aggressive caching w/ gha mode=max
Browse files Browse the repository at this point in the history
  • Loading branch information
hsheth2 committed Aug 27, 2024
1 parent c81d7cd commit 2b8cbd8
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 11 deletions.
22 changes: 15 additions & 7 deletions .github/actions/docker-custom-build-and-push/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ inputs:
description: "Main tag to use for the Docker image"
required: true
flavor:
description: 'Image flavor (e.g., slim, full)'
description: "Image flavor (e.g., slim, full)"
required: false
target:
description: "Sets the target stage to build"
Expand Down Expand Up @@ -61,7 +61,7 @@ runs:
# Code for testing the build when not pushing to Docker Hub.
- name: Build and Load image for testing (if not publishing)
uses: docker/build-push-action@v5
uses: docker/build-push-action@v6
if: ${{ inputs.publish != 'true' }}
with:
context: ${{ inputs.context }}
Expand All @@ -74,8 +74,12 @@ runs:
target: ${{ inputs.target }}
load: true
push: false
cache-from: type=registry,ref=${{ steps.docker_meta.outputs.tags }}
cache-to: type=inline
cache-from: |
type=gha
type=registry,ref=${{ steps.docker_meta.outputs.tags }}
cache-to: |
type=gha
type=inline
- name: Single Tag
if: ${{ inputs.publish != 'true' }}
shell: bash
Expand Down Expand Up @@ -108,7 +112,7 @@ runs:
username: ${{ inputs.username }}
password: ${{ inputs.password }}
- name: Build and Push Multi-Platform image
uses: docker/build-push-action@v5
uses: docker/build-push-action@v6
if: ${{ inputs.publish == 'true' }}
with:
context: ${{ inputs.context }}
Expand All @@ -118,7 +122,11 @@ runs:
tags: ${{ steps.docker_meta.outputs.tags }}
target: ${{ inputs.target }}
push: true
cache-from: type=registry,ref=${{ steps.docker_meta.outputs.tags }}
cache-to: type=inline
cache-from: |
type=gha
type=registry,ref=${{ steps.docker_meta.outputs.tags }}
cache-to: |
type=gha
type=inline
# TODO add code for vuln scanning?
7 changes: 7 additions & 0 deletions docker/datahub-ingestion-base/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,13 @@ ENV PATH="${VIRTUAL_ENV}/bin:$PATH"
RUN python3 -m venv $VIRTUAL_ENV && \
uv pip install --no-cache --upgrade pip setuptools wheel

# Note: Normally uv will create hardlinks from the cache directory to the venv.
# In our docker files, we normally use `RUN --mount=type=cache,... uv pip install ...`,
# which means the cache directory is on a separate filesystem. uv will emit a warning:
# Failed to hardlink files; falling back to full copy. This may lead to degraded performance.
# If the cache and target directories are on different filesystems, hardlinking may not be supported.
# If this is intentional, set `export UV_LINK_MODE=copy` or use `--link-mode=copy` to suppress this warning.

ENTRYPOINT [ "/entrypoint.sh" ]

FROM ${BASE_IMAGE} AS full-install
Expand Down
4 changes: 2 additions & 2 deletions docker/datahub-ingestion/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ RUN sed -i.bak "s/__version__ = \"1\!0.0.0.dev0\"/__version__ = \"$(echo $RELEAS
FROM base AS slim-install

RUN --mount=type=cache,target=/datahub-ingestion/.cache/uv,uid=1000,gid=1000 \
uv pip install -e ".[base,datahub-rest,datahub-kafka,snowflake,bigquery,redshift,mysql,postgres,hive,clickhouse,glue,dbt,looker,lookml,tableau,powerbi,superset,datahub-business-glossary]"
UV_LINK_MODE=copy uv pip install -e ".[base,datahub-rest,datahub-kafka,snowflake,bigquery,redshift,mysql,postgres,hive,clickhouse,glue,dbt,looker,lookml,tableau,powerbi,superset,datahub-business-glossary]"

FROM base AS full-install-build

Expand All @@ -42,7 +42,7 @@ USER datahub
COPY ./docker/datahub-ingestion/pyspark_jars.sh .

RUN --mount=type=cache,target=/datahub-ingestion/.cache/uv,uid=1000,gid=1000 \
uv pip install -e ".[base,all]" "./airflow-plugin[plugin-v2]" && \
UV_LINK_MODE=copy uv pip install -e ".[base,all]" "./airflow-plugin[plugin-v2]" && \
./pyspark_jars.sh && \
datahub --version

Expand Down
2 changes: 1 addition & 1 deletion docker/datahub-ingestion/Dockerfile-slim-only
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ RUN sed -i.bak "s/__version__ = \"1\!0.0.0.dev0\"/__version__ = \"$(echo $RELEAS
FROM base as slim-install

RUN --mount=type=cache,target=/datahub-ingestion/.cache/uv,uid=1000,gid=1000 \
uv pip install -e ".[base,datahub-rest,datahub-kafka,snowflake,bigquery,redshift,mysql,postgres,hive,clickhouse,glue,dbt,looker,lookml,tableau,powerbi,superset,datahub-business-glossary]" && \
UV_LINK_MODE=copy uv pip install -e ".[base,datahub-rest,datahub-kafka,snowflake,bigquery,redshift,mysql,postgres,hive,clickhouse,glue,dbt,looker,lookml,tableau,powerbi,superset,datahub-business-glossary]" && \
datahub --version

FROM slim-install as final
Expand Down
2 changes: 1 addition & 1 deletion docker/datahub-ingestion/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ ext {
docker_target = project.getProperties().getOrDefault("dockerTarget", "slim")
docker_version = "${version}${docker_target == 'slim' ? '-slim' : ''}"

revision = 6 // increment to trigger rebuild
revision = 7 // increment to trigger rebuild
}

dependencies {
Expand Down

0 comments on commit 2b8cbd8

Please sign in to comment.