Skip to content

k8s containerd LLAMA service test #354

k8s containerd LLAMA service test

k8s containerd LLAMA service test #354

name: k8s containerd LLAMA service test
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}
cancel-in-progress: true
on:
workflow_dispatch:
inputs:
logLevel:
description: 'Log level'
required: true
default: 'info'
push:
branches: [ main ]
paths-ignore:
- '**/README.md'
pull_request:
branches: [ main ]
paths-ignore:
- '**/README.md'
schedule:
- cron: "0 0 */1 * *"
jobs:
run:
runs-on: ubuntu-20.04
name: Run ggml plugin example
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Install apt-get packages
run: |
sudo ACCEPT_EULA=Y apt-get update
sudo ACCEPT_EULA=Y apt-get upgrade
sudo ACCEPT_EULA=Y apt-get install git wget jq
- name: Install containerd, WasmEdge, and crun with supprt of plugins and nn-preoload
run: |
sed 's|https://github.com/containers/crun|-b enable-wasmedge-plugin https://github.com/second-state/crun|g' containerd/install.sh | bash
- name: Installing and starting k8s
run: |
bash kubernetes_containerd/install.sh > k8s.log 2>&1
- name: Installing wasi_nn-ggml plugin and copy sys's dependencies into same path for container environment
run: |
curl -sSf https://raw.githubusercontent.com/WasmEdge/WasmEdge/master/utils/install.sh | bash -s -- --plugins wasi_nn-ggml
wget -qO- https://raw.githubusercontent.com/second-state/runwasi/main/release/utils/copy_sys_dependencies.sh | bash -s $HOME/.wasmedge/plugin/libwasmedgePluginWasiNN.so $HOME/.wasmedge/plugin/
- name: Download llm model
run: |
curl -LO https://huggingface.co/second-state/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_M.gguf
- name: Sleep for 1200s
run: sleep 1200s
shell: bash
- name: Dump the log of k8s setup
run: |
cat k8s.log
- name: Run llm api service in k8s
continue-on-error: true
run: |
bash k8s_containerd_llama/llama_server_application.sh >> dump.log 2>&1
- name: Test API server pod was created using the kubectl run command.
continue-on-error: true
run: |
curl -X POST http://localhost:8080/v1/models -H 'accept:application/json'
curl -X POST http://localhost:8080/v1/chat/completions -H 'accept:application/json' -H 'Content-Type: application/json' -d '{"messages":[{"role":"system", "content": "You are a helpful assistant."}, {"role":"user", "content": "Who is Robert Oppenheimer?"}], "model":"llama-2-chat"}' | jq .
curl -X POST http://localhost:8080/v1/chat/completions -H 'accept:application/json' -H 'Content-Type: application/json' -d '{"messages":[{"role":"system", "content": "You are a helpful assistant."}, {"role":"user", "content": "What new discoveries from the James Webb Space Telescope can I tell my nine-year-old about?"}], "model":"llama-2-chat"}' | jq .
- name: Display crun and wasmedge version
run: |
crun --version
wasmedge --version
- name: Dump the log of execution
run: |
cat dump.log