Skip to content

Commit

Permalink
Merge branch 'master' into fix/macos-mktemp
Browse files Browse the repository at this point in the history
  • Loading branch information
lugi0 authored Jun 12, 2024
2 parents 345fcfe + 6dfb73a commit 1ab4ddb
Show file tree
Hide file tree
Showing 20 changed files with 837 additions and 67 deletions.
2 changes: 1 addition & 1 deletion ods_ci/tasks/Resources/RHODS_OLM/install/oc_install.robot
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ ${SERVICEMESH_OP_NAME}= servicemeshoperator
${SERVICEMESH_SUB_NAME}= servicemeshoperator
${AUTHORINO_OP_NAME}= authorino-operator
${AUTHORINO_SUB_NAME}= authorino-operator
${AUTHORINO_CHANNEL_NAME}= managed-services
${AUTHORINO_CHANNEL_NAME}= tech-preview-v1
${RHODS_CSV_DISPLAY}= Red Hat OpenShift AI
${ODH_CSV_DISPLAY}= Open Data Hub Operator
${CUSTOM_MANIFESTS}= ${EMPTY}
Expand Down
2 changes: 2 additions & 0 deletions ods_ci/test-variables.yml.example
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,5 @@ APPLICATIONS_NAMESPACE: redhat-ods-applications
MONITORING_NAMESPACE: redhat-ods-monitoring
OPERATOR_NAMESPACE: redhat-ods-operator
NOTEBOOKS_NAMESPACE: rhods-notebooks
PIP_INDEX_URL: https://pypi.org/simple
PIP_TRUSTED_HOST: pypi.org
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,11 @@
"vllm-runtime": {
"completions_response_text": "{\"object\":\"text_completion\",\"created\":,\"model\":\"granite-8b-code-base\",\"choices\":[{\"index\":0,\"text\":\" city in California, one of the most populous cities in the United States\",\"logprobs\":null,\"finish_reason\":\"length\",\"stop_reason\":null}],\"usage\":{\"prompt_tokens\":5,\"total_tokens\":20,\"completion_tokens\":16}}"
}
},
"e5-mistral-7b":{
"vllm-runtime": {
"embeddings_response_text": "The actual embedded output is too large, and we don't check or match the response"
}
}
}
},
Expand Down
12 changes: 12 additions & 0 deletions ods_ci/tests/Resources/Files/llm/runtime_query_formats.json
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,18 @@
"response_text": "text"
}
}
},
"embeddings" : {
"http": {
"endpoint": "v1/embeddings",
"header": "Content-Type:application/json",
"body": "{'encoding_format': 'float', 'model': '${model_name}','input': '${query_text}'}",
"response_fields_map": {
"response": "data",
"completion_tokens": "completion_tokens",
"response_text": "embedding"
}
}
}
},
"containers": ["kserve-container"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ spec:
- /models-cache
- --port
- "8080"
image: quay.io/opendatahub/vllm:stable-e392b03
image: quay.io/modh/vllm@sha256:60f335015eff8c99508ff421c80f5f7b23b1310d87b0d4086b6f76f9a136b5a4
name: kserve-container
command:
- python3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ spec:
- /models-cache
- --port
- "8080"
image: quay.io/opendatahub/vllm:stable-e392b03
image: quay.io/modh/vllm@sha256:60f335015eff8c99508ff421c80f5f7b23b1310d87b0d4086b6f76f9a136b5a4
name: kserve-container
command:
- python3
Expand Down
40 changes: 20 additions & 20 deletions ods_ci/tests/Resources/Files/llm/vllm/vllm_servingruntime.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
apiVersion: serving.kserve.io/v1alpha1
kind: ServingRuntime
metadata:
metadata:
name: kserve-vllm
namespace: vllm-gpt2
spec:
annotations:
spec:
annotations:
sidecar.istio.io/inject: "true"
sidecar.istio.io/rewriteAppHTTPProbers: "true"
serving.knative.openshift.io/enablePassthrough: "true"
Expand All @@ -13,46 +13,46 @@ spec:
prometheus.io/port: '8080'
prometheus.io/path: "/metrics/"
multiModel: false
supportedModelFormats:
supportedModelFormats:
- name: vLLM
autoSelect: true
containers:
containers:
- name: kserve-container
#image: kserve/vllmserver:latest
image: quay.io/opendatahub/vllm:stable
startupProbe:
httpGet:
image: quay.io/modh/vllm@sha256:60f335015eff8c99508ff421c80f5f7b23b1310d87b0d4086b6f76f9a136b5a4
startupProbe:
httpGet:
port: 8080
path: /health
# Allow 12 minutes to start
failureThreshold: 24
periodSeconds: 30
readinessProbe:
httpGet:
readinessProbe:
httpGet:
port: 8080
path: /health
periodSeconds: 30
timeoutSeconds: 5
livenessProbe:
httpGet:
livenessProbe:
httpGet:
port: 8080
path: /health
periodSeconds: 100
timeoutSeconds: 8
terminationMessagePolicy: "FallbackToLogsOnError"
terminationGracePeriodSeconds: 120
args:
args:
- --port
- "8080"
- --model
- /mnt/models/gpt2
- --served-model-name
- "gpt2"
command:
command:
- python3
- -m
- vllm.entrypoints.openai.api_server
env:
env:
- name: STORAGE_URI
value: pvc://vlmm-gpt2-claim/
- name: HF_HUB_CACHE
Expand All @@ -62,16 +62,16 @@ spec:
- name: NUM_GPUS
value: "1"
- name: CUDA_VISIBLE_DEVICES
value: "0"
ports:
value: "0"
ports:
- containerPort: 8080
protocol: TCP
resources:
limits:
resources:
limits:
cpu: "4"
memory: 8Gi
nvidia.com/gpu: "1"
requests:
requests:
cpu: "1"
memory: 4Gi
nvidia.com/gpu: "1"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: ds-pipeline-custom-env-vars
namespace: my-project
data:
pip_index_url: 'https://pypi.org/simple'
pip_trusted_host: pypi.org
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
"""Hello world pipeline for pip_index_url clusters
This is an example of setting pip_index_url in a pipeline task
obtaining the value from a ConfigMap, in order to be able to run
the pipeline in a pip_index_url environment.
The pipeline reads the values from a ConfigMap (ds-pipeline-custom-env-vars)
and creates the environment variables PIP_INDEX_URL and PIP_TRUSTED_HOST
in the pipeline task.
Note: when compiling the pipeline, the resulting yaml file only uses
PIP_INDEX_URL (this is a limitation of kfp 2.7.0). We need to manually
modify the yaml file to use PIP_TRUSTED_HOST.
"""
from kfp import compiler, dsl
from kfp import kubernetes

common_base_image = "registry.redhat.io/ubi8/python-39@sha256:3523b184212e1f2243e76d8094ab52b01ea3015471471290d011625e1763af61"


@dsl.component(base_image=common_base_image,
pip_index_urls=['$PIP_INDEX_URL'])
def print_message(message: str):
import os
"""Prints a message"""
print("------------------------------------------------------------------")
print(message)
print('pip_index_url:' + os.environ['PIP_INDEX_URL'])
print('pip_trusted_host:' + os.environ['PIP_TRUSTED_HOST'])
print("------------------------------------------------------------------")


@dsl.pipeline(name="hello-world-pipeline", description="Pipeline that prints a hello message")
def hello_world_pipeline(message: str = "Hello world"):
print_message_task = print_message(message=message)
print_message_task.set_caching_options(False)

kubernetes.use_config_map_as_env(print_message_task,
config_map_name='ds-pipeline-custom-env-vars',
config_map_key_to_env={'pip_index_url': 'PIP_INDEX_URL'})

kubernetes.use_config_map_as_env(print_message_task,
config_map_name='ds-pipeline-custom-env-vars',
config_map_key_to_env={'pip_trusted_host': 'PIP_TRUSTED_HOST'})


if __name__ == "__main__":
compiler.Compiler().compile(hello_world_pipeline,
package_path=__file__.replace(".py", "_compiled.yaml"))
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# PIPELINE DEFINITION
# Name: hello-world-pipeline
# Description: Pipeline that prints a hello message
# Inputs:
# message: str [Default: 'Hello world']
components:
comp-print-message:
executorLabel: exec-print-message
inputDefinitions:
parameters:
message:
parameterType: STRING
deploymentSpec:
executors:
exec-print-message:
container:
args:
- --executor_input
- '{{$}}'
- --function_to_execute
- print_message
command:
- sh
- -c
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
\ python3 -m pip install --quiet --no-warn-script-location --index-url $PIP_INDEX_URL\
\ --trusted-host $PIP_TRUSTED_HOST 'kfp==2.7.0' '--no-deps' 'typing-extensions>=3.7.4,<5;\
\ python_version<\"3.9\"' && \"$0\" \"$@\"\n"
- sh
- -ec
- 'program_path=$(mktemp -d)
printf "%s" "$0" > "$program_path/ephemeral_component.py"
_KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
'
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
\ *\n\ndef print_message(message: str):\n import os\n \"\"\"Prints\
\ a message\"\"\"\n print(\"------------------------------------------------------------------\"\
)\n print(message)\n print('pip_index_url:' + os.environ['PIP_INDEX_URL'])\n\
\ print('pip_trusted_host:' + os.environ['PIP_TRUSTED_HOST'])\n print(\"\
------------------------------------------------------------------\")\n\n"
image: registry.redhat.io/ubi8/python-39@sha256:3523b184212e1f2243e76d8094ab52b01ea3015471471290d011625e1763af61
pipelineInfo:
description: Pipeline that prints a hello message
name: hello-world-pipeline
root:
dag:
tasks:
print-message:
cachingOptions: {}
componentRef:
name: comp-print-message
inputs:
parameters:
message:
componentInputParameter: message
taskInfo:
name: print-message
inputDefinitions:
parameters:
message:
defaultValue: Hello world
isOptional: true
parameterType: STRING
schemaVersion: 2.1.0
sdkVersion: kfp-2.7.0
---
platforms:
kubernetes:
deploymentSpec:
executors:
exec-print-message:
configMapAsEnv:
- configMapName: ds-pipeline-custom-env-vars
keyToEnv:
- configMapKey: pip_index_url
envVar: PIP_INDEX_URL
- configMapName: ds-pipeline-custom-env-vars
keyToEnv:
- configMapKey: pip_trusted_host
envVar: PIP_TRUSTED_HOST
Loading

0 comments on commit 1ab4ddb

Please sign in to comment.