Skip to content

add base workflow for running benchmarks on demand (part2) #924

add base workflow for running benchmarks on demand (part2)

add base workflow for running benchmarks on demand (part2) #924

Workflow file for this run

name: RAGStack CI
on:
workflow_dispatch: {}
schedule:
- cron: "0 */6 * * *"
pull_request:
paths-ignore:
- ".github/workflows/langchain-master-daily.yml"
- ".github/workflows/llamaindex-main-daily.yml"
- ".github/workflows/release-ragstack.yml"
- ".github/workflows/security-scan.yml"
- "scripts/**"
- "docs/**"
- "README.adoc"
- "PACKAGE_README.md"
branches:
- main
concurrency:
group: ragstack-ci-${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
preconditions:
name: Preconditions
runs-on: ubuntu-latest
outputs:
notebooks: ${{ steps.filter.outputs.notebooks }}
e2e_tests: ${{ steps.filter.outputs.e2e_tests }}
steps:
- uses: actions/checkout@v3
- uses: dorny/paths-filter@v2
id: filter
with:
filters: ./.github/changes-filter.yaml
build-docker:
name: Build Docker image
needs: ["preconditions"]
runs-on: ubuntu-latest
steps:
- name: Check out the repo
uses: actions/checkout@v3
- name: Build docker image
uses: ./.github/actions/build-deploy-docker-image
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
# we don't really care about the version here, we just want to make sure the image builds
ragstack-version: "0.*"
docker-tag: latest-dev
push: "false"
- name: Scan docker image
uses: ./.github/actions/scan-docker-image
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
docker-tag: "latest"
snyk-token: ${{ secrets.SNYK_TOKEN }}
- name: Docker examples - basic
run: |
docker --version
cd docker/examples/basic
sudo docker build -t ragstack-basic .
- name: Docker examples - multistage
run: |
docker --version
cd docker/examples/multistage
sudo docker build -t ragstack-multistage .
unit-tests:
name: Unit Tests
runs-on: ubuntu-latest
steps:
- name: Check out the repo
uses: actions/checkout@v3
- name: "Setup: Python 3.11"
uses: actions/setup-python@v4
with:
python-version: "3.11"
cache: "pip"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install tox
- name: Run ragstack-ai unit tests
run: |
tox
e2e-tests:
name: End-to-end Tests (${{ matrix.name }})
needs: ["preconditions"]
if: needs.preconditions.outputs.e2e_tests == 'true' || needs.preconditions.outputs.notebooks == 'true' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
include:
# - name: Astra PROD
# vector_db_mode: "astradb"
# needs_astra_setup: true
# astra_token_secret: E2E_TESTS_ASTRA_PROD_DB_TOKEN
# env: PROD
# region: eu-west-1
# cloud: aws
# run_notebooks: true
# testspace_space: ""
- name: Astra DEV
type: "astradb"
needs_astra_setup: "true"
astra_token_secret: E2E_TESTS_ASTRA_DEV_DB_TOKEN
env: DEV
region: us-west-2
cloud: aws
run_notebooks: "false"
testspace_space: "RAGStack test suite - RAGStack dev - AstraDB"
- name: DSE
type: "local-cassandra"
needs_astra_setup: "false"
astra_token_secret: ""
env: ""
region: ""
cloud: ""
run_notebooks: "false"
testspace_space: "RAGStack test suite - RAGStack dev - DSE"
steps:
- name: Check out the repo
uses: actions/checkout@v3
- name: "Setup: Python 3.11"
uses: actions/setup-python@v4
with:
python-version: "3.11"
cache: "pip"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install tox
- name: Setup AstraDB
uses: ./.github/actions/setup-astra-db
if: matrix.needs_astra_setup == 'true'
id: astra-db
with:
astra-token: ${{ secrets[matrix.astra_token_secret] }}
db-name: ${{ github.run_id }}
env: ${{ matrix.env }}
region: ${{ matrix.region }}
cloud: ${{ matrix.cloud }}
- name: Run notebook tests
# we run notebook tests only on PROD env since we CassIO need different parameters for DEV, and we don't want to dirty the notebook
if: matrix.run_notebooks == 'true' && (needs.preconditions.outputs.notebooks == 'true' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
env:
ASTRA_DB_APPLICATION_TOKEN: "${{ secrets[matrix.astra_token_secret] }}"
ASTRA_DB_API_ENDPOINT: "${{ steps.astra-db.outputs.db_endpoint }}"
ASTRA_DB_ID: "${{ steps.astra-db.outputs.db_id }}"
OPENAI_API_KEY: "${{ secrets.E2E_TESTS_OPEN_AI_KEY }}"
LANGCHAIN_API_KEY: "${{ secrets.E2E_TESTS_LANGCHAIN_API_KEY }}"
GCLOUD_ACCOUNT_KEY_JSON: "${{ secrets.E2E_TESTS_GCLOUD_ACCOUNT_KEY_JSON }}"
run: |
source scripts/ci-common-env.sh
tox -e notebooks
- name: Run E2E tests
id: e2e-tests
if: needs.preconditions.outputs.e2e_tests == 'true' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
env:
VECTOR_DATABASE_TYPE: "${{ matrix.type }}"
ASTRA_DB_TOKEN: "${{ secrets[matrix.astra_token_secret] }}"
ASTRA_DB_ENDPOINT: "${{ steps.astra-db.outputs.db_endpoint }}"
ASTRA_DB_ID: "${{ steps.astra-db.outputs.db_id }}"
ASTRA_DB_ENV: "${{ matrix.env }}"
OPEN_AI_KEY: "${{ secrets.E2E_TESTS_OPEN_AI_KEY }}"
AZURE_OPEN_AI_KEY: "${{ secrets.E2E_TESTS_AZURE_OPEN_AI_KEY }}"
AZURE_OPEN_AI_ENDPOINT: "${{ secrets.E2E_TESTS_AZURE_OPEN_AI_ENDPOINT }}"
AZURE_BLOB_STORAGE_CONNECTION_STRING: "${{ secrets.E2E_TESTS_AZURE_BLOB_STORAGE_CONNECTION_STRING }}"
GCLOUD_ACCOUNT_KEY_JSON: "${{ secrets.E2E_TESTS_GCLOUD_ACCOUNT_KEY_JSON }}"
GOOGLE_API_KEY: "${{ secrets.E2E_TESTS_GOOGLE_API_KEY }}"
AWS_ACCESS_KEY_ID: "${{ secrets.E2E_TESTS_AWS_ACCESS_KEY_ID }}"
AWS_SECRET_ACCESS_KEY: "${{ secrets.E2E_TESTS_AWS_SECRET_ACCESS_KEY }}"
BEDROCK_AWS_REGION: "${{ secrets.E2E_TESTS_BEDROCK_AWS_REGION }}"
HUGGINGFACE_HUB_KEY: "${{ secrets.E2E_TESTS_HUGGINGFACE_HUB_KEY }}"
NVIDIA_API_KEY: "${{ secrets.E2E_TESTS_NVIDIA_API_KEY }}"
LANGCHAIN_API_KEY: "${{ secrets.E2E_TESTS_LANGCHAIN_API_KEY }}"
run: |
source scripts/ci-common-env.sh
tox -c ragstack-e2e-tests
- name: Dump report on Github Summary
if: always()
uses: ./.github/actions/add-report-to-github-summary
with:
all: true
- name: Compute commit URL
if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
id: commit-ref
run: echo "commit-ref=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
- name: Prepare report for Slack
if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
uses: ./.github/actions/generate-slack-report
with:
from-report-file: ragstack-e2e-tests/failed-tests-report.txt
output-file: slack-report.json
type: "RAGStack Tests"
outcome: ${{ steps.e2e-tests.outcome }}
commit-url: "https://github.com/datastax/ragstack-ai/commits/${{ steps.commit-ref.outputs.commit-ref }}"
- name: Dump report on Slack
if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
uses: slackapi/[email protected]
with:
payload-file-path: "./slack-report.json"
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
- name: Testspace deploy report
uses: ./.github/actions/deploy-testspace-report
if: always() && matrix.testspace_space != '' && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
with:
token: ${{ secrets.TESTSPACE_TOKEN }}
report-file: ragstack-e2e-tests/results.xml
space: ${{ matrix.testspace_space }}
- name: Cleanup AstraDB
uses: ./.github/actions/cleanup-astra-db
if: always() && matrix.needs_astra_setup == 'true'
continue-on-error: true
with:
astra-token: ${{ secrets[matrix.astra_token_secret] }}
db-name: ${{ github.run_id }}
env: ${{ matrix.env }}