docker-compose.nim.yml

# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


services:

  nginx-cache:
    environment:
      # Route Variables
      - NGINX_UPSTREAM_NVAI=${NGINX_UPSTREAM_NVAI:-http://nim-llm:8000}
      - NGINX_UPSTREAM_NIM_LLM=${NGINX_UPSTREAM_NIM_LLM:-http://nim-llm:8000}
      - NGINX_UPSTREAM_NIM_EMBED=${NGINX_UPSTREAM_NIM_EMBED:-http://nim-embed:8000}
    depends_on:
      - nim-llm
      - nim-embed

  nim-llm:
    image: ${NGC_NIM_LLM_CONTAINER:-nvcr.io/nim/meta/llama-3.1-70b-instruct:1.3.0}
    # Increase the shared memory available to the container
    shm_size: 16G
    runtime: nvidia
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: ${NIM_LLM_GPU_COUNT:-4}
              capabilities: [ gpu ]
    networks:
      - app_network
    environment:
      - NGC_API_KEY=${NVIDIA_API_KEY:?"NVIDIA_API_KEY is required"}
    volumes:
      - nim-llm-cache:/opt/nim/.cache
    ports:
      - "8081:8000"

  nim-embed:
    image: ${NGC_NIM_EMBED_CONTAINER:-nvcr.io/nim/nvidia/nv-embedqa-e5-v5:1.0.1}
    # Increase the shared memory available to the container
    shm_size: 16G
    runtime: nvidia
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: ${NIM_EMBED_GPU_COUNT:-1}
              capabilities: [ gpu ]
    networks:
      - app_network
    environment:
      - NGC_API_KEY=${NVIDIA_API_KEY:?"NVIDIA_API_KEY is required"}
    volumes:
      - nim-embedding-cache:/opt/nim/.cache
    ports:
      - "8082:8000"


volumes:
  nim-llm-cache:
    driver: local
  nim-embedding-cache:
    driver: local