diff --git a/ods_ci/tests/Resources/Page/DistributedWorkloads/DistributedWorkloads.resource b/ods_ci/tests/Resources/Page/DistributedWorkloads/DistributedWorkloads.resource index 3d3f97a40..33d54634c 100644 --- a/ods_ci/tests/Resources/Page/DistributedWorkloads/DistributedWorkloads.resource +++ b/ods_ci/tests/Resources/Page/DistributedWorkloads/DistributedWorkloads.resource @@ -9,6 +9,10 @@ ${CODEFLARE_TEST_TIMEOUT_LONG} 20m ${GO_TEST_TIMEOUT} 1h ${JOB_GO_BIN} %{WORKSPACE=.}/go-bin ${GO_JUNIT_REPORT_TOOL} github.com/jstemmer/go-junit-report/v2@latest +${VIRTUAL_ENV_NAME} venv3.9 +${CODEFLARE-SDK-API_URL} %{CODEFLARE-SDK-API_URL=https://api.github.com/repos/project-codeflare/codeflare-sdk/releases/latest} +${CODEFLARE-SDK_DIR} codeflare-sdk-upgrade +${CODEFLARE-SDK_REPO_URL} %{CODEFLARE-SDK_REPO_URL=https://github.com/project-codeflare/codeflare-sdk.git} *** Keywords *** @@ -53,3 +57,53 @@ Convert Go Test Results To Junit IF ${result.rc} != 0 FAIL Failed to convert Go test results to Junit END + +Run Codeflare Upgrade Tests + [Documentation] Run codeflare upgrade tests by cloning codeflare-sdk repo + [Arguments] ${TEST_NAME} + ${latest_tag} = Run Process curl -s "${CODEFLARE-SDK-API_URL}" | grep '"tag_name":' | cut -d '"' -f 4 + ... shell=True stderr=STDOUT + Log To Console codeflare-sdk latest tag is : ${latest_tag.stdout} + IF ${latest_tag.rc} != 0 + FAIL Unable to fetch codeflare-sdk latest tag + END + + Clone Git Repository ${CODEFLARE-SDK_REPO_URL} ${latest_tag.stdout} ${CODEFLARE-SDK_DIR} + + ${result} = Run Process virtualenv -p python3.9 ${VIRTUAL_ENV_NAME} + ... shell=true stderr=STDOUT + Log To Console ${result.stdout} + IF ${result.rc} != 0 + FAIL Unable to setup Python virtual environment + END + + ${result} = Run Process source ${VIRTUAL_ENV_NAME}/bin/activate && cd ${CODEFLARE-SDK_DIR} && poetry env use 3.9 && poetry install --with test,docs && poetry run pytest -v -s ./tests/upgrade/raycluster_sdk_upgrade_test.py::${TEST_NAME} --timeout\=300 && deactivate + ... shell=true + ... stderr=STDOUT + Log To Console ${result.stdout} + IF ${result.rc} != 0 + FAIL Running test ${TEST_NAME} failed + END + +Codeflare Upgrade Tests Teardown + [Documentation] Run codeflare upgrade tests by cloning codeflare-sdk repo + [Arguments] ${project_name} ${project_created} + ${result} = Run Process rm -rf ${VIRTUAL_ENV_NAME} + ... shell=true stderr=STDOUT + Log To Console ${result.stdout} + IF ${result.rc} != 0 + FAIL Unable to cleanup Python virtual environment + END + + ${result} = Run Process rm -rf ${CODEFLARE-SDK_DIR} + ... shell=true stderr=STDOUT + Log To Console ${result.stdout} + IF ${result.rc} != 0 + FAIL Unable to cleanup directory ${CODEFLARE-SDK_DIR} + END + + IF ${project_created} == True Run Keywords + ... Run oc delete project ${project_name} AND + ... Run Process oc delete LocalQueue local-queue-mnist -n ${project_name} & + ... oc delete ClusterQueue cluster-queue-mnist & + ... oc delete ResourceFlavor default-flavor-mnist shell=True diff --git a/ods_ci/tests/Resources/Page/DistributedWorkloads/WorkloadMetricsUI.resource b/ods_ci/tests/Resources/Page/DistributedWorkloads/WorkloadMetricsUI.resource index 1d0f95de9..728ccfcc6 100644 --- a/ods_ci/tests/Resources/Page/DistributedWorkloads/WorkloadMetricsUI.resource +++ b/ods_ci/tests/Resources/Page/DistributedWorkloads/WorkloadMetricsUI.resource @@ -125,10 +125,13 @@ Get Memory Requested ${memory_requested}= Replace String ${output} Mi ${EMPTY} ${memory_requested_num}= Convert To Number ${memory_requested} ${memory_requested_gb_value}= Evaluate round( ${memory_requested_num} / 1024, 3) - ELSE + ELSE IF "${workload_type}" == "RayCluster" ${memory_requested}= Replace String ${output} Ki ${EMPTY} ${memory_requested_num}= Convert To Number ${memory_requested} ${memory_requested_gb_value}= Evaluate round( ${memory_requested_num} / (1024*1024), 3) + ELSE + ${memory_requested_num}= Convert To Number ${output} + ${memory_requested_gb_value}= Evaluate round( ${memory_requested_num} / (1024*1024*1024), 3) END RETURN ${memory_requested_gb_value} diff --git a/ods_ci/tests/Tests/100__deploy/120__upgrades/120__pre_upgrades.robot b/ods_ci/tests/Tests/100__deploy/120__upgrades/120__pre_upgrades.robot index cf969a400..d8a361204 100644 --- a/ods_ci/tests/Tests/100__deploy/120__upgrades/120__pre_upgrades.robot +++ b/ods_ci/tests/Tests/100__deploy/120__upgrades/120__pre_upgrades.robot @@ -18,6 +18,8 @@ Resource ../../../Resources/Page/OCPDashboard/Pods/Pods.robot Resource ../../../Resources/Page/OCPDashboard/Builds/Builds.robot Resource ../../../Resources/Page/HybridCloudConsole/OCM.robot Resource ../../../Resources/CLI/ModelServing/modelmesh.resource +Resource ../../../Resources/Page/DistributedWorkloads/DistributedWorkloads.resource +Resource ../../../Resources/Page/DistributedWorkloads/WorkloadMetricsUI.resource Suite Setup Dashboard Suite Setup Suite Teardown RHOSi Teardown @@ -34,6 +36,8 @@ ${PRJ_DESCRIPTION}= project used for model serving tests ${MODEL_NAME}= test-model ${MODEL_CREATED}= ${FALSE} ${RUNTIME_NAME}= Model Serving Test +${DW_PROJECT_CREATED}= False + *** Test Cases *** Set PVC Size Via UI @@ -119,6 +123,39 @@ Verify User Can Deploy Custom Runtime For Upgrade Page Should Contain Element //tr[@id='caikit-runtime'] [Teardown] Dashboard Test Teardown +Verify Distributed Workload Metrics Resources By Creating Ray Cluster Workload + [Documentation] Creates the Ray Cluster and verify resource usage + [Tags] Upgrade + ${PRJ_UPGRADE} Set Variable test-ns-rayupgrade + ${JOB_NAME} Set Variable mnist + Run Codeflare Upgrade Tests TestMNISTRayClusterUp + Launch Dashboard ${TEST_USER.USERNAME} ${TEST_USER.PASSWORD} ${TEST_USER.AUTH_TYPE} + ... ${ODH_DASHBOARD_URL} ${BROWSER.NAME} ${BROWSER.OPTIONS} + Open Distributed Workload Metrics Home Page + Select Distributed Workload Project By Name ${PRJ_UPGRADE} + Set Global Variable ${DW_PROJECT_CREATED} True + Select Refresh Interval 15 seconds + Wait Until Element Is Visible ${DISTRIBUITED_WORKLOAD_RESOURCE_METRICS_TITLE_XP} timeout=20 + Wait Until Element Is Visible xpath=//*[text()="Running"] timeout=30 + + ${cpu_requested} = Get CPU Requested ${PRJ_UPGRADE} local-queue-mnist + ${memory_requested} = Get Memory Requested ${PRJ_UPGRADE} local-queue-mnist Upgrade + Check Requested Resources Chart ${PRJ_UPGRADE} ${cpu_requested} ${memory_requested} + Check Requested Resources ${PRJ_UPGRADE} ${CPU_SHARED_QUOTA} + ... ${MEMEORY_SHARED_QUOTA} ${cpu_requested} ${memory_requested} RayCluster + + Check Distributed Workload Resource Metrics Status ${JOB_NAME} Running + Check Distributed Worklaod Status Overview ${JOB_NAME} Running + ... All pods were ready or succeeded since the workload admission + + Click Button ${PROJECT_METRICS_TAB_XP} + Check Distributed Workload Resource Metrics Chart ${PRJ_UPGRADE} ${cpu_requested} + ... ${memory_requested} RayCluster ${JOB_NAME} + + [Teardown] Run Keyword If Test Failed + ... Codeflare Upgrade Tests Teardown ${PRJ_UPGRADE} ${DW_PROJECT_CREATED} + + *** Keywords *** Dashboard Suite Setup [Documentation] Basic suite setup diff --git a/ods_ci/tests/Tests/100__deploy/120__upgrades/122__post_ugrade.robot b/ods_ci/tests/Tests/100__deploy/120__upgrades/122__post_ugrade.robot index 12d036028..b0855ced7 100644 --- a/ods_ci/tests/Tests/100__deploy/120__upgrades/122__post_ugrade.robot +++ b/ods_ci/tests/Tests/100__deploy/120__upgrades/122__post_ugrade.robot @@ -17,7 +17,8 @@ Resource ../../../Resources/Common.robot Resource ../../../Resources/Page/OCPDashboard/Pods/Pods.robot Resource ../../../Resources/Page/OCPDashboard/Builds/Builds.robot Resource ../../../Resources/Page/HybridCloudConsole/OCM.robot - +Resource ../../../Resources/Page/DistributedWorkloads/DistributedWorkloads.resource +Resource ../../../Resources/Page/DistributedWorkloads/WorkloadMetricsUI.resource *** Variables *** ${S_SIZE} 25 @@ -30,6 +31,7 @@ ${PRJ_DESCRIPTION}= project used for model serving tests ${MODEL_NAME}= test-model ${MODEL_CREATED}= ${FALSE} ${RUNTIME_NAME}= Model Serving Test +${DW_PROJECT_CREATED}= False *** Test Cases *** @@ -145,6 +147,39 @@ Verify Custom Runtime Exists After Upgrade Delete Serving Runtime Template From CLI By Runtime Name OR Display Name runtime_name=caikit-runtime [Teardown] Dashboard Test Teardown +Verify Ray Cluster Exists And Monitor Workload Metrics By Submitting Ray Job After Upgrade + [Documentation] check the Ray Cluster exists , submit ray job and verify resource usage after upgrade + [Tags] Upgrade + ${PRJ_UPGRADE} Set Variable test-ns-rayupgrade + ${LOCAL_QUEUE} Set Variable local-queue-mnist + ${JOB_NAME} Set Variable mnist + Run Codeflare Upgrade Tests TestMnistJobSubmit + Set Global Variable ${DW_PROJECT_CREATED} True + Launch Dashboard ${TEST_USER.USERNAME} ${TEST_USER.PASSWORD} ${TEST_USER.AUTH_TYPE} + ... ${ODH_DASHBOARD_URL} ${BROWSER.NAME} ${BROWSER.OPTIONS} + Open Distributed Workload Metrics Home Page + Select Distributed Workload Project By Name ${PRJ_UPGRADE} + Select Refresh Interval 15 seconds + Wait Until Element Is Visible ${DISTRIBUITED_WORKLOAD_RESOURCE_METRICS_TITLE_XP} timeout=20 + Wait Until Element Is Visible xpath=//*[text()="Running"] timeout=30 + + ${cpu_requested} = Get CPU Requested ${PRJ_UPGRADE} ${LOCAL_QUEUE} + ${memory_requested} = Get Memory Requested ${PRJ_UPGRADE} ${LOCAL_QUEUE} Upgrade + Check Requested Resources Chart ${PRJ_UPGRADE} ${cpu_requested} ${memory_requested} + Check Requested Resources ${PRJ_UPGRADE} ${CPU_SHARED_QUOTA} + ... ${MEMEORY_SHARED_QUOTA} ${cpu_requested} ${memory_requested} RayCluster + + Check Distributed Workload Resource Metrics Status ${JOB_NAME} Running + Check Distributed Worklaod Status Overview ${JOB_NAME} Running + ... All pods were ready or succeeded since the workload admission + + Click Button ${PROJECT_METRICS_TAB_XP} + Check Distributed Workload Resource Metrics Chart ${PRJ_UPGRADE} ${cpu_requested} + ... ${memory_requested} RayCluster ${JOB_NAME} + + [Teardown] Codeflare Upgrade Tests Teardown ${PRJ_UPGRADE} ${DW_PROJECT_CREATED} + + *** Keywords *** Dashboard Suite Setup [Documentation] Basic suite setup