k8s containerd LLAMA service test #354
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: k8s containerd LLAMA service test | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.head_ref || github.ref }} | |
cancel-in-progress: true | |
on: | |
workflow_dispatch: | |
inputs: | |
logLevel: | |
description: 'Log level' | |
required: true | |
default: 'info' | |
push: | |
branches: [ main ] | |
paths-ignore: | |
- '**/README.md' | |
pull_request: | |
branches: [ main ] | |
paths-ignore: | |
- '**/README.md' | |
schedule: | |
- cron: "0 0 */1 * *" | |
jobs: | |
run: | |
runs-on: ubuntu-20.04 | |
name: Run ggml plugin example | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
- name: Install apt-get packages | |
run: | | |
sudo ACCEPT_EULA=Y apt-get update | |
sudo ACCEPT_EULA=Y apt-get upgrade | |
sudo ACCEPT_EULA=Y apt-get install git wget jq | |
- name: Install containerd, WasmEdge, and crun with supprt of plugins and nn-preoload | |
run: | | |
sed 's|https://github.com/containers/crun|-b enable-wasmedge-plugin https://github.com/second-state/crun|g' containerd/install.sh | bash | |
- name: Installing and starting k8s | |
run: | | |
bash kubernetes_containerd/install.sh > k8s.log 2>&1 | |
- name: Installing wasi_nn-ggml plugin and copy sys's dependencies into same path for container environment | |
run: | | |
curl -sSf https://raw.githubusercontent.com/WasmEdge/WasmEdge/master/utils/install.sh | bash -s -- --plugins wasi_nn-ggml | |
wget -qO- https://raw.githubusercontent.com/second-state/runwasi/main/release/utils/copy_sys_dependencies.sh | bash -s $HOME/.wasmedge/plugin/libwasmedgePluginWasiNN.so $HOME/.wasmedge/plugin/ | |
- name: Download llm model | |
run: | | |
curl -LO https://huggingface.co/second-state/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_M.gguf | |
- name: Sleep for 1200s | |
run: sleep 1200s | |
shell: bash | |
- name: Dump the log of k8s setup | |
run: | | |
cat k8s.log | |
- name: Run llm api service in k8s | |
continue-on-error: true | |
run: | | |
bash k8s_containerd_llama/llama_server_application.sh >> dump.log 2>&1 | |
- name: Test API server pod was created using the kubectl run command. | |
continue-on-error: true | |
run: | | |
curl -X POST http://localhost:8080/v1/models -H 'accept:application/json' | |
curl -X POST http://localhost:8080/v1/chat/completions -H 'accept:application/json' -H 'Content-Type: application/json' -d '{"messages":[{"role":"system", "content": "You are a helpful assistant."}, {"role":"user", "content": "Who is Robert Oppenheimer?"}], "model":"llama-2-chat"}' | jq . | |
curl -X POST http://localhost:8080/v1/chat/completions -H 'accept:application/json' -H 'Content-Type: application/json' -d '{"messages":[{"role":"system", "content": "You are a helpful assistant."}, {"role":"user", "content": "What new discoveries from the James Webb Space Telescope can I tell my nine-year-old about?"}], "model":"llama-2-chat"}' | jq . | |
- name: Display crun and wasmedge version | |
run: | | |
crun --version | |
wasmedge --version | |
- name: Dump the log of execution | |
run: | | |
cat dump.log |