From 414b217ad06367376dcb37e23cbb15147f4b0979 Mon Sep 17 00:00:00 2001 From: Robert Love Date: Tue, 31 Dec 2024 06:52:37 -0500 Subject: [PATCH] nfd-nvidia --- .../rules/kustomization.yaml | 1 + .../node-feature-discovery/rules/nvidia-gpu.yaml | 14 ++++++++++++++ .../nvidia-device-plugin/app/helmrelease.yaml | 3 +-- .../nvidia-device-plugin/app/kustomization.yaml | 1 - 4 files changed, 16 insertions(+), 3 deletions(-) create mode 100644 kubernetes/main/apps/kube-system/node-feature-discovery/rules/nvidia-gpu.yaml diff --git a/kubernetes/main/apps/kube-system/node-feature-discovery/rules/kustomization.yaml b/kubernetes/main/apps/kube-system/node-feature-discovery/rules/kustomization.yaml index 433f7a0633..743a8ba381 100644 --- a/kubernetes/main/apps/kube-system/node-feature-discovery/rules/kustomization.yaml +++ b/kubernetes/main/apps/kube-system/node-feature-discovery/rules/kustomization.yaml @@ -10,3 +10,4 @@ resources: - ./intel-gpu.yaml - ./vlan-device.yaml - ./barcode-device.yaml + - ./nvidia-gpu.yaml diff --git a/kubernetes/main/apps/kube-system/node-feature-discovery/rules/nvidia-gpu.yaml b/kubernetes/main/apps/kube-system/node-feature-discovery/rules/nvidia-gpu.yaml new file mode 100644 index 0000000000..3b0855b2dd --- /dev/null +++ b/kubernetes/main/apps/kube-system/node-feature-discovery/rules/nvidia-gpu.yaml @@ -0,0 +1,14 @@ +--- +apiVersion: nfd.k8s-sigs.io/v1alpha1 +kind: NodeFeatureRule +metadata: + name: nvidia-gpu-plugin +spec: + rules: + - name: nvidia.gpu + labels: + nvidia.feature.node.kubernetes.io/gpu: "true" + matchFeatures: + - feature: pci.device + matchExpressions: + vendor: { op: In, value: ["10de"] } diff --git a/kubernetes/main/apps/kube-system/nvidia-device-plugin/app/helmrelease.yaml b/kubernetes/main/apps/kube-system/nvidia-device-plugin/app/helmrelease.yaml index 5d9709f929..345dcf926e 100644 --- a/kubernetes/main/apps/kube-system/nvidia-device-plugin/app/helmrelease.yaml +++ b/kubernetes/main/apps/kube-system/nvidia-device-plugin/app/helmrelease.yaml @@ -24,7 +24,6 @@ spec: retries: 3 upgrade: - cleanupOnFail: true crds: CreateReplace remediation: retries: 3 @@ -32,7 +31,7 @@ spec: values: runtimeClassName: nvidia nodeSelector: - nvidia.com/gpu.present: "true" + nvidia.feature.node.kubernetes.io/gpu: "true" config: map: default: |- diff --git a/kubernetes/main/apps/kube-system/nvidia-device-plugin/app/kustomization.yaml b/kubernetes/main/apps/kube-system/nvidia-device-plugin/app/kustomization.yaml index ee83b23ef3..fad3625bfa 100644 --- a/kubernetes/main/apps/kube-system/nvidia-device-plugin/app/kustomization.yaml +++ b/kubernetes/main/apps/kube-system/nvidia-device-plugin/app/kustomization.yaml @@ -1,5 +1,4 @@ --- ---- # yaml-language-server: $schema=https://json.schemastore.org/kustomization apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization