-
Notifications
You must be signed in to change notification settings - Fork 32
/
Copy pathdocker-compose.nim.yml
executable file
·76 lines (69 loc) · 2.22 KB
/
docker-compose.nim.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
services:
nginx-cache:
environment:
# Route Variables
- NGINX_UPSTREAM_NVAI=${NGINX_UPSTREAM_NVAI:-http://nim-llm:8000}
- NGINX_UPSTREAM_NIM_LLM=${NGINX_UPSTREAM_NIM_LLM:-http://nim-llm:8000}
- NGINX_UPSTREAM_NIM_EMBED=${NGINX_UPSTREAM_NIM_EMBED:-http://nim-embed:8000}
depends_on:
- nim-llm
- nim-embed
nim-llm:
image: ${NGC_NIM_LLM_CONTAINER:-nvcr.io/nim/meta/llama-3.1-70b-instruct:1.3.0}
# Increase the shared memory available to the container
shm_size: 16G
runtime: nvidia
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: ${NIM_LLM_GPU_COUNT:-4}
capabilities: [ gpu ]
networks:
- app_network
environment:
- NGC_API_KEY=${NVIDIA_API_KEY:?"NVIDIA_API_KEY is required"}
volumes:
- nim-llm-cache:/opt/nim/.cache
ports:
- "8081:8000"
nim-embed:
image: ${NGC_NIM_EMBED_CONTAINER:-nvcr.io/nim/nvidia/nv-embedqa-e5-v5:1.0.1}
# Increase the shared memory available to the container
shm_size: 16G
runtime: nvidia
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: ${NIM_EMBED_GPU_COUNT:-1}
capabilities: [ gpu ]
networks:
- app_network
environment:
- NGC_API_KEY=${NVIDIA_API_KEY:?"NVIDIA_API_KEY is required"}
volumes:
- nim-embedding-cache:/opt/nim/.cache
ports:
- "8082:8000"
volumes:
nim-llm-cache:
driver: local
nim-embedding-cache:
driver: local