Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add initial upgrade testing for Data Science Pipelines #1869

Merged
merged 12 commits into from
Oct 1, 2024
Merged
6 changes: 6 additions & 0 deletions ods_ci/libs/DataSciencePipelinesKfp.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,12 @@ def wait_for_run_completion(self, run_id, timeout=160, sleep_duration=5):
response = self.client.wait_for_run_completion(run_id=run_id, timeout=timeout, sleep_duration=sleep_duration)
return response.state

@keyword
def get_run_status(self, run_id):
"""###Gets run status"""
response = self.client.get_run(run_id)
return response.state

@keyword
def check_run_status(self, run_id, timeout=160):
"""Waits for a run to complete"""
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
*** Settings ***
Documentation Collection of keywords to interact with Data Science Pipelines via CLI
Library OperatingSystem
Library String
Library ../../../../libs/DataSciencePipelinesAPI.py
Library ../../../../libs/DataSciencePipelinesKfp.py
Resource ../../../Resources/OCP.resource
Resource ../../../Resources/Common.robot


*** Variables ***
${DSPA_PATH}= tests/Resources/Files/pipeline-samples/v2/dspa
Expand Down Expand Up @@ -42,10 +43,16 @@ Create Pipeline Server
... -p OBJECT_STORAGE_REGION=${object_storage_region}
... -p OBJECT_STORAGE_BUCKET=${object_storage_bucket_name}

Run oc process -f ${DSPA_PATH}/dspa-template.yaml ${template_parameters} | oc apply -n ${namespace} -f -
Run And Verify Command oc process -f ${DSPA_PATH}/dspa-template.yaml ${template_parameters} | oc apply -n ${namespace} -f - # robocop: off=line-too-long

IF ${configure_pip_index} Create Pipelines ConfigMap With Custom Pip Index Url And Trusted Host ${namespace}

Get DSP Version
[Documentation] Returns dspVersion of the DSPA deployed in ${namespace}
[Arguments] ${namespace}
${dsp_version}= Run And Verify Command oc get datasciencepipelinesapplications -n ${namespace} -o json | jq -r '.items[0].spec.dspVersion' # robocop: off=line-too-long
RETURN ${dsp_version}

# robocop: disable:line-too-long
Create PipelineServer Using Custom DSPA
[Documentation] Install and verifies that DataSciencePipelinesApplication CRD is installed and working
Expand All @@ -55,21 +62,25 @@ Create PipelineServer Using Custom DSPA
[Arguments] ${namespace} ${dspa_file}=data-science-pipelines-sample.yaml
... ${assert_install}=${TRUE} ${configure_pip_index}=${TRUE}

Run oc apply -f "${DSPA_PATH}/${dspa_file}" -n ${namespace}
Run And Verify Command oc apply -f "${DSPA_PATH}/${dspa_file}" -n ${namespace}
IF ${assert_install}==True
${generation_value} Run oc get datasciencepipelinesapplications -n ${namespace} -o json | jq '.items[0].metadata.generation'
${generation_value}= Run And Verify Command oc get datasciencepipelinesapplications -n ${namespace} -o json | jq '.items[0].metadata.generation' # robocop: off=line-too-long
Should Be True ${generation_value} == 2 DataSciencePipelinesApplication created
END

IF ${configure_pip_index} Create Pipelines ConfigMap With Custom Pip Index Url And Trusted Host ${namespace}

Verify Pipeline Server Deployments # robocop: disable
[Documentation] Verifies the correct deployment of DS Pipelines in the rhods namespace
[Documentation] Verifies the correct deployment of a DSPv2 DataSciencePipelineApplication
[Arguments] ${namespace}

@{all_pods}= Oc Get kind=Pod namespace=${namespace}
... label_selector=component=data-science-pipelines
Run Keyword And Continue On Failure Length Should Be ${all_pods} 7

${pods_count}= Get Length ${all_pods}
IF ${pods_count} < 7
Fail DSPA requires at least 7 pods running in the namespace
END

@{pipeline_api_server}= Oc Get kind=Pod namespace=${namespace}
... label_selector=app=ds-pipeline-dspa
Expand Down Expand Up @@ -106,12 +117,52 @@ Verify Pipeline Server Deployments # robocop: disable
${containerNames}= Create List mariadb
Verify Deployment ${mariadb} 1 1 ${containerNames}

Verify DSPv1 Pipeline Server Deployments
[Documentation] Verifies the correct deployment of a DSPv1 DataSciencePipelineApplication
[Arguments] ${namespace}

@{all_pods}= Oc Get kind=Pod namespace=${namespace}
... label_selector=component=data-science-pipelines

${pods_count}= Get Length ${all_pods}
IF ${pods_count} < 4
Fail DSPA requires at least 4 pods running in the namespace
END

@{pipeline_api_server}= Oc Get kind=Pod namespace=${namespace}
... label_selector=app=ds-pipeline-dspa
${containerNames}= Create List oauth-proxy ds-pipeline-api-server
Verify Deployment ${pipeline_api_server} 1 2 ${containerNames}

@{pipeline_persistenceagent}= Oc Get kind=Pod namespace=${namespace}
... label_selector=app=ds-pipeline-persistenceagent-dspa
${containerNames}= Create List ds-pipeline-persistenceagent
Verify Deployment ${pipeline_persistenceagent} 1 1 ${containerNames}

@{pipeline_scheduledworkflow}= Oc Get kind=Pod namespace=${namespace}
... label_selector=app=ds-pipeline-scheduledworkflow-dspa
${containerNames}= Create List ds-pipeline-scheduledworkflow
Verify Deployment ${pipeline_scheduledworkflow} 1 1 ${containerNames}

@{mariadb}= Oc Get kind=Pod namespace=${namespace}
... label_selector=app=mariadb-dspa
${containerNames}= Create List mariadb
Verify Deployment ${mariadb} 1 1 ${containerNames}

Wait Until Pipeline Server Is Deployed
[Documentation] Waits until all the expected pods of the pipeline server
... are running
[Arguments] ${namespace}
Wait Until Keyword Succeeds 10 times 10s
... Verify Pipeline Server Deployments namespace=${namespace}

${dspVersion}= Get DSP Version ${namespace}
IF "${dspVersion}" == "v2"
Wait Until Keyword Succeeds 10 times 10s
... Verify Pipeline Server Deployments namespace=${namespace}
ELSE
Wait Until Keyword Succeeds 10 times 10s
... Verify DSPv1 Pipeline Server Deployments namespace=${namespace}
END


Wait Until Pipeline Server Is Deleted
[Documentation] Waits until all pipeline server pods are deleted
Expand All @@ -128,13 +179,13 @@ Create Pipelines ConfigMap With Custom Pip Index Url And Trusted Host
[Documentation] Creates a Configmap (ds-pipeline-custom-env-vars) in the project,
... storing the values for pip_index_url and pip_trusted_host
[Arguments] ${namespace}
Run oc create configmap ds-pipeline-custom-env-vars -n ${namespace} --from-literal=pip_index_url=${PIP_INDEX_URL} --from-literal=pip_trusted_host=${PIP_TRUSTED_HOST}
Run And Verify Command oc create configmap ds-pipeline-custom-env-vars -n ${namespace} --from-literal=pip_index_url=${PIP_INDEX_URL} --from-literal=pip_trusted_host=${PIP_TRUSTED_HOST} # robocop: off=line-too-long

Create Secret With Pipelines Object Storage Information
[Documentation] Creates a secret needed to create a pipeline server containing the object storage credentials
[Arguments] ${namespace} ${object_storage_access_key} ${object_storage_secret_key}
Run oc create secret generic dashboard-dspa-secret -n ${namespace} --from-literal=AWS_ACCESS_KEY_ID=${object_storage_access_key} --from-literal=AWS_SECRET_ACCESS_KEY=${object_storage_secret_key}
Run oc label secret dashboard-dspa-secret -n ${namespace} opendatahub.io/dashboard=true
Run And Verify Command oc create secret generic dashboard-dspa-secret -n ${namespace} --from-literal=AWS_ACCESS_KEY_ID=${object_storage_access_key} --from-literal=AWS_SECRET_ACCESS_KEY=${object_storage_secret_key} # robocop: off=line-too-long
Run And Verify Command oc label secret dashboard-dspa-secret -n ${namespace} opendatahub.io/dashboard=true


Import Pipeline And Create Run
Expand Down Expand Up @@ -164,6 +215,20 @@ Import Pipeline And Create Run

RETURN ${pipeline_id} ${pipeline_version_id} ${pipeline_run_id} ${experiment_id}

Verify Run Status
[Documentation] Verifies pipeline run status matches ${pipeline_run_expected_status}
[Arguments] ${namespace} ${username} ${password}
... ${pipeline_run_id} ${pipeline_run_expected_status}="SUCCEEDED"

DataSciencePipelinesKfp.Setup Client user=${username} pwd=${password} project=${namespace}

${pipeline_run_status}= DataSciencePipelinesKfp.Get Run Status run_id=${pipeline_run_id}
IF "${pipeline_run_status}" != "${pipeline_run_expected_status}"
${error_msg}= Catenate Expected pipeline status was ${pipeline_run_expected_status} but pipeline run
... has status=${pipeline_run_status}
Fail ${error_msg}
END

Wait For Run Completion And Verify Status
[Documentation]
[Arguments] ${namespace} ${username} ${password}
Expand All @@ -175,11 +240,8 @@ Wait For Run Completion And Verify Status
${pipeline_run_status}= DataSciencePipelinesKfp.Wait For Run Completion run_id=${pipeline_run_id}
... timeout=${pipeline_run_timeout} sleep_duration=${5}

IF "${pipeline_run_status}" != "${pipeline_run_expected_status}"
${error_msg}= Catenate Expected pipeline status was ${pipeline_run_expected_status} but pipeline run
... finished with status=${pipeline_run_status}
Fail ${error_msg}
END
Verify Run Status namespace=${namespace} username=${username} password=${password}
... pipeline_run_id=${pipeline_run_id} pipeline_run_expected_status=${pipeline_run_expected_status}

RETURN ${pipeline_run_status}

Expand Down Expand Up @@ -207,5 +269,3 @@ Delete Pipeline And Related Resources
END

DataSciencePipelinesKfp.Delete Pipeline ${pipeline_id}


Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
*** Settings ***
Documentation Upgrade Testing Keywords
Resource DataSciencePipelinesBackend.resource
Resource ../../../Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/Projects.resource


*** Variables ***
${PROJECT}= dsp-upgrade-testing
${PIPELINE_LONGRUNNING_FILEPATH}= tests/Resources/Files/pipeline-samples/v2/pip_index_url/take_nap_pipeline_root_compiled.yaml # robocop: disable:line-too-long


*** Keywords ***
Setup Environment For Upgrade Testing
[Documentation] Creates project dsp-test-upgrade and sets up the resources to test during upgrade:
... - Creates a pipeline server
... - Starts a pipeline that will run for 1h

Create Project And Configure Pipeline Server ${PROJECT}
Start Long Running Pipeline ${PROJECT}

Verify Resources After Upgrade
[Documentation] Verifies the status of the resources created in ${PROJECT} after the upgrade
... Deletes ${PROJECT} if all verifications are correct (leaving for debugging purposes if not)

DataSciencePipelinesBackend.Wait Until Pipeline Server Is Deployed namespace=${PROJECT}

Verify Run Status
... namespace=${PROJECT} username=${TEST_USER.USERNAME} password=${TEST_USER.PASSWORD}
... pipeline_run_id=${DSP_LONGRUNNING_PIPELINE_RUN_ID} pipeline_run_expected_status=RUNNING

Projects.Delete Project Via CLI By Display Name ${PROJECT}

Create Project And Configure Pipeline Server
[Documentation] Creates a data science pipelines project ${namespace} (deleting existing one if needed),
... configures a pipeline server using the default configuration and waits until the server is running
[Arguments] ${namespace}
Projects.Delete Project Via CLI By Display Name ${namespace}
Projects.Create Data Science Project From CLI ${namespace}
DataSciencePipelinesBackend.Create Pipeline Server namespace=${namespace}
... object_storage_access_key=${S3.AWS_ACCESS_KEY_ID}
... object_storage_secret_key=${S3.AWS_SECRET_ACCESS_KEY}
... object_storage_endpoint=${S3.BUCKET_2.ENDPOINT}
... object_storage_region=${S3.BUCKET_2.REGION}
... object_storage_bucket_name=${S3.BUCKET_2.NAME}
... dsp_version=v2
DataSciencePipelinesBackend.Wait Until Pipeline Server Is Deployed namespace=${namespace}

Start Long Running Pipeline
[Documentation] Imports and creates a run of a long running pipeline
[Arguments] ${namespace}

${pipeline_run_params}= Create Dictionary naptime_secs=${3600}
Dismissed Show dismissed Hide dismissed

# robocop:off=unused-variable
${pipeline_id} ${pipeline_version_id} ${pipeline_run_id} ${experiment_id}=
... DataSciencePipelinesBackend.Import Pipeline And Create Run
... namespace=${namespace} username=${TEST_USER.USERNAME} password=${TEST_USER.PASSWORD}
... pipeline_name=take-nap
... pipeline_description=A pipeline that runs for 1h and prints a message
... pipeline_package_path=${PIPELINE_LONGRUNNING_FILEPATH}
... pipeline_run_name=take-nap-run
... pipeline_run_params=${pipeline_run_params}

Set Global Variable ${DSP_LONGRUNNING_PIPELINE_RUN_ID} ${pipeline_run_id}
Dismissed Show dismissed Hide dismissed
8 changes: 4 additions & 4 deletions ods_ci/tests/Resources/Common.robot
Original file line number Diff line number Diff line change
Expand Up @@ -437,13 +437,13 @@ Extract URLs From Text
RETURN ${urls}

Run And Verify Command
[Documentation] Run and verify shell command
[Arguments] ${command} ${print_to_log}=${TRUE}
[Documentation] Run and verify shell command
[Arguments] ${command} ${print_to_log}=${TRUE} ${expected_rc}=${0}
${result}= Run Process ${command} shell=yes stderr=STDOUT
IF ${print_to_log} Log ${result.stdout} console=True
Should Be True ${result.rc} == 0
Should Be True ${result.rc} == ${expected_rc}
RETURN ${result.stdout}

Run And Watch Command
[Documentation] Run any shell command (including args) with optional:
... Timeout: 10 minutes by default.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,12 @@ def add_gpu_toleration(task: PipelineTask, accelerator_type: str, accelerator_li
kubernetes.add_toleration(task, key=accelerator_type, operator="Exists", effect="NoSchedule")


@dsl.component(base_image=common_base_image, packages_to_install=["torch"], pip_index_urls=["$PIP_INDEX_URL"])
@dsl.component(
base_image=common_base_image,
packages_to_install=["torch"],
pip_index_urls=["$PIP_INDEX_URL"],
pip_trusted_hosts=["$PIP_TRUSTED_HOST"],
)
def verify_gpu_availability(gpu_toleration_added: bool):
import torch # noqa: PLC0415

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ deploymentSpec:
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
\ python3 -m pip install --quiet --no-warn-script-location --index-url $PIP_INDEX_URL\
\ --trusted-host $PIP_INDEX_URL 'kfp==2.9.0' '--no-deps' 'typing-extensions>=3.7.4,<5;\
\ --trusted-host $PIP_TRUSTED_HOST 'kfp==2.9.0' '--no-deps' 'typing-extensions>=3.7.4,<5;\
\ python_version<\"3.9\"' && python3 -m pip install --quiet --no-warn-script-location\
\ --index-url $PIP_INDEX_URL --trusted-host $PIP_INDEX_URL 'torch' && \"\
$0\" \"$@\"\n"
\ --index-url $PIP_INDEX_URL --trusted-host $PIP_TRUSTED_HOST 'torch' &&\
\ \"$0\" \"$@\"\n"
- sh
- -ec
- 'program_path=$(mktemp -d)
Expand Down Expand Up @@ -72,10 +72,10 @@ deploymentSpec:
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
\ python3 -m pip install --quiet --no-warn-script-location --index-url $PIP_INDEX_URL\
\ --trusted-host $PIP_INDEX_URL 'kfp==2.9.0' '--no-deps' 'typing-extensions>=3.7.4,<5;\
\ --trusted-host $PIP_TRUSTED_HOST 'kfp==2.9.0' '--no-deps' 'typing-extensions>=3.7.4,<5;\
\ python_version<\"3.9\"' && python3 -m pip install --quiet --no-warn-script-location\
\ --index-url $PIP_INDEX_URL --trusted-host $PIP_INDEX_URL 'torch' && \"\
$0\" \"$@\"\n"
\ --index-url $PIP_INDEX_URL --trusted-host $PIP_TRUSTED_HOST 'torch' &&\
\ \"$0\" \"$@\"\n"
- sh
- -ec
- 'program_path=$(mktemp -d)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from kfp import compiler, dsl, kubernetes
from kfp.dsl import PipelineTask

common_base_image = (
"registry.redhat.io/ubi8/python-39@sha256:3523b184212e1f2243e76d8094ab52b01ea3015471471290d011625e1763af61"
)


def add_pip_index_configuration(task: PipelineTask):
kubernetes.use_config_map_as_env(
task,
config_map_name="ds-pipeline-custom-env-vars",
config_map_key_to_env={"pip_index_url": "PIP_INDEX_URL", "pip_trusted_host": "PIP_TRUSTED_HOST"},
)


@dsl.component(base_image=common_base_image)
def take_nap(naptime_secs: int) -> str:
"""Sleeps for secs"""
from time import sleep # noqa: PLC0415

print(f"Sleeping for {naptime_secs} seconds: Zzzzzz ...")
sleep(naptime_secs)
return "I'm awake now. Did I snore?"


@dsl.component(base_image=common_base_image)
def wake_up(message: str):
"""Wakes up from nap printing a message"""
print(message)


@dsl.pipeline(name="take-nap-pipeline", description="Pipeline that sleeps for 15 mins (900 secs)")
def take_nap_pipeline(naptime_secs: int = 900):
take_nap_task = take_nap(naptime_secs=naptime_secs).set_caching_options(False)
add_pip_index_configuration(take_nap_task)
wake_up_task = wake_up(message=take_nap_task.output).set_caching_options(False)
add_pip_index_configuration(wake_up_task)


if __name__ == "__main__":
compiler.Compiler().compile(take_nap_pipeline, package_path=__file__.replace(".py", "_compiled.yaml"))
Loading
Loading