From 50f0110842fd8cd009c4d461e1ef2b8b5ac7863e Mon Sep 17 00:00:00 2001 From: nadavbuc Date: Sun, 7 Apr 2024 11:40:45 +0300 Subject: [PATCH 01/26] Checkpoint --- charts/karpenter_nodes/Chart.yaml | 7 + .../nodegroups_example_values.yaml | 8 + .../karpenter_nodes/templates/nodeclass.yaml | 86 +++++++ .../karpenter_nodes/templates/nodepool.yaml | 221 ++++++++++++++++++ .../userdata_example_values.yaml | 36 +++ charts/karpenter_nodes/values.yaml | 112 +++++++++ 6 files changed, 470 insertions(+) create mode 100644 charts/karpenter_nodes/Chart.yaml create mode 100644 charts/karpenter_nodes/nodegroups_example_values.yaml create mode 100644 charts/karpenter_nodes/templates/nodeclass.yaml create mode 100644 charts/karpenter_nodes/templates/nodepool.yaml create mode 100644 charts/karpenter_nodes/userdata_example_values.yaml create mode 100644 charts/karpenter_nodes/values.yaml diff --git a/charts/karpenter_nodes/Chart.yaml b/charts/karpenter_nodes/Chart.yaml new file mode 100644 index 0000000..96de2cd --- /dev/null +++ b/charts/karpenter_nodes/Chart.yaml @@ -0,0 +1,7 @@ +apiVersion: v2 +name: karpenter_nodes +description: A Helm chart for generating NodeClasses and NodePools for Karpenter +type: application + +version: 0.0.1 +appVersion: "0.34.0" diff --git a/charts/karpenter_nodes/nodegroups_example_values.yaml b/charts/karpenter_nodes/nodegroups_example_values.yaml new file mode 100644 index 0000000..7beac00 --- /dev/null +++ b/charts/karpenter_nodes/nodegroups_example_values.yaml @@ -0,0 +1,8 @@ +excludeInstanceSize: + - metal + +nodeGroups: + nodes-defaults: + instances: {} + nodes-test: + instances: {} diff --git a/charts/karpenter_nodes/templates/nodeclass.yaml b/charts/karpenter_nodes/templates/nodeclass.yaml new file mode 100644 index 0000000..ae85571 --- /dev/null +++ b/charts/karpenter_nodes/templates/nodeclass.yaml @@ -0,0 +1,86 @@ +{{- range $k, $v := $.Values.nodeGroups }} +{{- $_ := set $ "key" $k }} +{{- $_ := set $ "value" $v }} +--- +apiVersion: karpenter.k8s.aws/{{ $.Values.ApiVersion }} +kind: EC2NodeClass +metadata: + name: "{{ $k }}-{{ $v.instances.architecture | default $.Values.instances.architecture }}" +spec: + #IAM Role or Instance Profile to be attached to the instance + {{- if hasKey $v "IamRole" }} + role: {{ $v.IamRole }} + {{- else if hasKey $v "IamInstanceProfile" }} + instanceProfile: {{ $v.IamInstanceProfile }} + {{- else if hasKey $.Values "IamRole" }} + role: {{ $.Values.IamRole }} + {{- else if hasKey $.Values "IamInstanceProfile" }} + instanceProfile: {{ $.Values.IamInstanceProfile }} + {{- else }} + {{- fail "nodeClass error: Either IamRole or IamInstanceProfile must be defined" }} + {{- end }} + #AMI Family (Can be overwritten with selector) + amiFamily: {{ $v.amiFamily | default $.Values.amiFamily }} + + amiSelectorTerms: + {{- if hasKey $v "amiSelectorTerms" }} + {{- toYaml $v.amiSelectorTerms | nindent 4 }} + {{- else }} + {{- toYaml $.Values.amiSelectorTerms | nindent 4 }} + {{- end }} + + #Instance Network Configuration + subnetSelectorTerms: + {{- if hasKey $v "subnetSelectorTerms" }} + {{- toYaml $v.subnetSelectorTerms | nindent 4 }} + {{- else }} + {{- toYaml $.Values.subnetSelectorTerms | nindent 4 }} + {{- end }} + securityGroupSelectorTerms: + {{- if hasKey $v "securityGroupSelectorTerms" }} + {{- toYaml $v.securityGroupSelectorTerms | nindent 4 }} + {{- else }} + {{- toYaml $.Values.securityGroupSelectorTerms | nindent 4 }} + {{- end }} + + #Instance Tags + tags: + cluster: {{ $.Values.clusterName }} + {{ $.Values.nodeGroupLabelName }}: {{ $v.nodeGroupLabel | default $k }} + {{ if or hasKey ($v "nodeTags") ($.Values "nodeTags") }} + {{- toYaml ($v.nodeTags | default $.Values.tags) | nindent 4 }} + {{- end }} + {{ if hasKey $v "additionalNodeTags" }} + {{- toYaml $v.tags | nindent 4 }} + {{- end }} + managed_by: "karpenter" + + #Instace Volumes + blockDeviceMappings: + {{- if hasKey $v "blockDeviceMappings" }} + {{- toYaml $v.blockDeviceMappings | nindent 4 }} + {{- else }} + {{- toYaml $.Values.blockDeviceMappings | nindent 4 }} + {{- end }} + + {{- if hasKey $v "instanceStorePolicy" }} + instanceStorePolicy: {{ $v.instanceStorePolicy }} + {{- else if hasKey $.Values "instanceStorePolicy" }} + instanceStorePolicy: {{ $.Values.instanceStorePolicy }} + {{- end }} + + #Instance Metadata + metadataOptions: + httpEndpoint: {{ $v.httpEndpoint | default $.Values.httpEndpoint }} + httpProtocolIPv6: {{ $v.httpProtocolIPv6 | default $.Values.httpProtocolIPv6 }} + httpPutResponseHopLimit: {{ $v.httpPutResponseHopLimit | default $.Values.httpPutResponseHopLimit }} + httpTokens: {{ $v.httpTokens | default $.Values.httpTokens }} + {{- if or (hasKey $v "userData") (hasKey $.Values "userData") }} + userData: | + {{- if hasKey $v "userData" }} + {{- tpl $.Values.userData $ | nindent 4 }} + {{- else if hasKey $.Values "userData" }} + {{- tpl $.Values.userData $ | nindent 4 }} + {{- end }} + {{- end}} +{{- end }} diff --git a/charts/karpenter_nodes/templates/nodepool.yaml b/charts/karpenter_nodes/templates/nodepool.yaml new file mode 100644 index 0000000..4bd8629 --- /dev/null +++ b/charts/karpenter_nodes/templates/nodepool.yaml @@ -0,0 +1,221 @@ +{{- range $k, $v := $.Values.nodeGroups }} +--- +apiVersion: karpenter.k8s.aws/{{ $.Values.ApiVersion }} +kind: EC2NodeClass +kind: NodePool +metadata: + name: "{{ $k }}-{{ $v.instances.architecture | default $.Values.instances.architecture }}" +spec: + template: + metadata: + labels: + {{ $.Values.nodeGroupLabelName }}: {{ $v.nodeGroupLabel | default $k }} + cluster: {{ $.Values.clusterName }} + {{- if hasKey $v "labels" }} + {{- range $labelName, $labelValue := $v.labels }} + {{ $labelName }}: {{ $labelValue }} + {{- end }} + {{- end }} + {{- if hasKey $v "annotations" }} + annotations: + {{- range $annotationName, $annotationValue := $v.annotations }} + {{ $annotationName }}: {{ $annotationValue }} + {{- end }} + {{- end }} + spec: + nodeClassRef: + {{- if hasKey $v "nodeClassRef" }} + {{ $v.nodeClassRef | toYaml | nindent 8 }} + {{- else }} + name: {{ $k }}-{{ $v.instances.architecture | default $.Values.instances.architecture }} + {{- end }} + {{- if or ($v.additionalTaints) (eq ($v.nodeLabelTaint | default $.Values.nodeLabelTaint) "true") (eq ($v.instances.architecture | default $.Values.instances.architecture) "arm64") }} + {{- if haskey $v.taints }} + taints: + {{- range $v.$taints }} + - key: {{ .key }} + value: {{ .value }} + effect: {{ .effect }} + {{- end }} + {{- end }} + {{- if hasKey $v "startupTaints" }} + startupTaints: + {{- range $v.startupTaints }} + - key: {{ .key }} + value: {{ .value }} + effect: {{ .effect }} + {{- end }} + {{- end }} + requirements: + - key: "karpenter.k8s.aws/instance-category" + operator: In + values: + {{- range $v.instances.categories | default $.Values.instances.categories }} + - {{ . }} + {{- end }} + - key: "karpenter.k8s.aws/instance-cpu" + operator: In + values: + {{- range $v.instances.cores | default $.Values.instances.cores }} + - {{ . | quote }} + {{- end }} + {{- if not $v.instances.noMinimumGeneration }} + - key: karpenter.k8s.aws/instance-generation + operator: Gt + values: {{ sub ($v.instances.minGeneration | default $.Values.instances.minGeneration) 1 | quote | list }} + {{- end }} + - key: "topology.kubernetes.io/zone" + operator: In + values: + {{- range $v.availabilityZones | default $.Values.availabilityZones }} + - {{ . }} + {{- end }} + - key: "kubernetes.io/arch" + operator: In + values: + {{- if eq ($v.instances.architecture | default $.Values.instances.architecture) "multiarch" }} + - amd64 + - arm64 + {{- else }} + - {{ $v.instances.architecture | default $.Values.instances.architecture }} + {{- end }} + - key: "karpenter.sh/capacity-type" + operator: In + values: + {{- range $v.instances.capacityType | default $.Values.instances.capacityType }} + - {{ . }} + {{- end }} + - key: kubernetes.io/os + operator: In + values: + {{- range $v.instances.operatingSystems | default $.Values.instances.operatingSystems }} + - {{ . }} + {{- end }} + {{- if or (hasKey $.Values "excludeFamilies") (hasKey $v "excludeFamilies") }} + - key: "karpenter.k8s.aws/instance-family" + operator: NotIn + values: + {{- if hasKey $v "excludeFamilies" }} + {{- range $v .excludeFamilies } + - {{ . }} + {{- end }} + {{- else }} + {{- range $.Values.excludeFamilies }} + - {{ . }} + {{- end }} + {{- end }} + {{- end }} + {{- if or (hasKey $.Values "excludeCpuManufacturer") (hasKey $v "excludeCpuManufacturer") }} + - key: "karpenter.k8s.aws/instance-cpu-manufacturer" + operator: NotIn + values: + {{- if hasKey $v "excludeCpuManufacturer" }} + {{- range $v .excludeCpuManufacturer }} + - {{ . }} + {{- end }} + {{- else }} + {{- range $.Values.excludeCpuManufacturer }} + - {{ . }} + {{- end }} + {{- end }} + {{- end }} + {{- if or (hasKey $.Values "excludeInstanceSize") (hasKey $v "excludeInstanceSize") }} + - key: "karpenter.k8s.aws/instance-size" + operator: NotIn + values: + {{- if hasKey $v "excludeInstanceSize" }} + {{- range $v .excludeInstanceSize }} + - {{ . }} + {{- end }} + {{- else }} + {{- range $.Values.excludeInstanceSize }} + - {{ . }} + {{- end }} + {{- end }} + {{- end }} + {{- if hasKey $v "capacitySpread" }} + - key: capacity-spread + operator: In + values: + {{- range untilStep (int $v.capacitySpread.start) (int (add1 $v.capacitySpread.end)) 1 }} + - "{{ . }}" + {{- end }} + {{- end }} + {{- if hasKey $v.instances "instanceTypes" }} + - key: node.kubernetes.io/instance-type + operator: In + values: {{- range $v.instances.instanceTypes }} + - {{ . -}} + {{- end }} + {{- end }} + kubelet: + {{- if or (hasKey $v.kubelet.clusterDNS) (hasKey $.Values.kubelet.clusterDNS) }} + clusterDNS: + {{- if hasKey $v.kubelet.clusterDNS }} + {{- range $v.kubelet.clusterDNS }} + {{ . }} + {{- end }} + {{- else }} + {{- range $.Values.kubelet.clusterDNS }} + {{ . }} + {{- end }} + {{- end }} + {{- end }} + systemReserved: + cpu: {{ $v.kubelet.systemReserved.cpu | default $.Values.kubelet.systemReserved.cpu }} + memory: {{ $v.kubelet.systemReserved.memory | default $.Values.kubelet.systemReserved.memory }} + ephemeral-storage: {{ $v.kubelet.systemReserved.ephemeralStorage | default $.Values.kubelet.systemReserved.ephemeralStorage }} + kubeReserved: + cpu: {{ $v.kubelet.kubeReserved.cpu | default $.Values.kubelet.kubeReserved.cpu }} + memory: {{ $v.kubelet.kubeReserved.memory | default $.Values.kubelet.kubeReserved.memory }} + ephemeral-storage: {{ $v.kubelet.kubeReserved.ephemeralStorage | default $.Values.kubelet.kubeReserved.ephemeralStorage }} + evictionHard: + memory.available: {{ $v.kubelet.evictionHard.memory.available | default $.Values.kubelet.evictionHard.memory.available }} + nodefs.available: {{ $v.kubelet.evictionHard.nodefs.available | default $.Values.kubelet.evictionHard.nodefs.available }} + nodefs.inodesFree: {{ $v.kubelet.evictionHard.nodefs.inodesFree | default $.Values.kubelet.evictionHard.nodefs.inodesFree }} + evictionSoft: + memory.available: {{ $v.kubelet.evictionSoft.memory.available | default $.Values.kubelet.evictionSoft.memory.available }} + nodefs.available: {{ $v.kubelet.evictionSoft.nodefs.available | default $.Values.kubelet.evictionSoft.nodefs.available }} + nodefs.inodesFree: {{ $v.kubelet.evictionSoft.nodefs.inodesFree | default $.Values.kubelet.evictionSoft.nodefs.inodesFree }} + imagefs.available: {{ $v.kubelet.evictionSoft.imagefs.available | default $.Values.kubelet.evictionSoft.imagefs.available }} + imagefs.inodesFree: {{ $v.kubelet.evictionSoft.imagefs.inodesFree | default $.Values.kubelet.evictionSoft.imagefs.inodesFree }} + pid.available: {{ $v.kubelet.evictionSoft.pid.available | default $.Values.kubelet.evictionSoft.pid.available }} + evictionSoftGracePeriod: + imagefs.available: {{ $v.kubelet.evictionSoftGracePeriod.imagefs.available | default $.Values.kubelet.evictionSoftGracePeriod.imagefs.available }} + imagefs.inodesFree: {{ $v.kubelet.evictionSoftGracePeriod.imagefs.inodesFree | default $.Values.kubelet.evictionSoftGracePeriod.imagefs.inodesFree }} + memory.available: {{ $v.kubelet.evictionSoftGracePeriod.memory.available | default $.Values.kubelet.evictionSoftGracePeriod.memory.available }} + nodefs.available: {{ $v.kubelet.evictionSoftGracePeriod.nodefs.available | default $.Values.kubelet.evictionSoftGracePeriod.nodefs.available }} + nodefs.inodesFree: {{ $v.kubelet.evictionSoftGracePeriod.nodefs.inodesFree | default $.Values.kubelet.evictionSoftGracePeriod.nodefs.inodesFree }} + pid.available: {{ $v.kubelet.evictionSoftGracePeriod.pid.available | default $.Values.kubelet.evictionSoftGracePeriod.pid.available }} + {{- if or (hasKey $v.kubelet.imageGCHighThresholdPercent) (hasKey $.Values.kubelet.imageGCHighThresholdPercent) }} + imageGCHighThresholdPercent: {{ $v.kubelet.imageGCHighThresholdPercent | default $.Values.kubelet.imageGCHighThresholdPercent }} + {{- end }} + {{- if or (hasKey $v.kubelet.imageGCLowThresholdPercent) (hasKey $.Values.kubelet.imageGCLowThresholdPercent) }} + imageGCLowThresholdPercent: {{ $v.kubelet.imageGCLowThresholdPercent | default $.Values.kubelet.imageGCLowThresholdPercent }} + {{- end }} + {{- if or (hasKey $v.kubelet.imageMinimumGCAge) (hasKey $.Values.kubelet.imageMinimumGCAge) }} + cpuCFSQuota: {{ $v.kubelet.cpuCFSQuota | default $.Values.kubelet.cpuCFSQuota }} + {{- end }} + {{- if or (hasKey $v.kubelet.cpuCFSQuota) (hasKey $.Values.kubelet.cpuCFSQuota) }} + podsPerCore: {{ $v.kubelet.podsPerCore | default $.Values.kubelet.podsPerCore }} + {{- end }} + {{- if or (hasKey $v.kubelet.podsPerCore) (hasKey $.Values.kubelet.podsPerCore) }} + maxPods: {{ $v.kubelet.maxPods | default $.Values.kubelet.maxPods }} + {{- end }} + disruption: + expireAfter: {{ $v.expireAfter | default $.Values.expireAfter }} + consolidationPolicy: {{$v.consolidationPolicy | default $.Values.consolidationPolicy}} + consolidateAfter: {{ $v.consolidateAfter | default $.Values.consolidateAfter }} + {{- end }} + {{- if $v.budgets }} + budgets: + {{- $v.budgets | toYaml | nindent 6 }} + {{- end }} + {{- if haskey $v.limits }} + limits: + {{- range $limitName, $limitValue := $v.limits }} + {{ $limitName }}: {{ $limitValue }} + {{- end }} + {{- end }} + weight: {{ $v.weight | default 1 }} +{{- end }} diff --git a/charts/karpenter_nodes/userdata_example_values.yaml b/charts/karpenter_nodes/userdata_example_values.yaml new file mode 100644 index 0000000..740ffff --- /dev/null +++ b/charts/karpenter_nodes/userdata_example_values.yaml @@ -0,0 +1,36 @@ +userData: | + CLUSTER_NAME={{ $.Values.clusterName }} + INSTANCEGROUP={{ .value.nodeGroupLabel | default .key }} + INSTANCE_ID=`/usr/bin/ec2-metadata --instance-id | awk '{print $2}'` + ID_SUFFIX=`/usr/bin/ec2-metadata --instance-id | awk '{print substr($0,length-3,4)}'` + HOSTNAME="${CLUSTER_NAME}-${INSTANCEGROUP}-$ID_SUFFIX" + hostname $HOSTNAME + echo $HOSTNAME > /etc/hostname + aws ec2 create-tags --resources $INSTANCE_ID --tags=Key=Name,Value=$HOSTNAME + sed -i "s/127.0.0.1 [0-9a-z-]*\s*localhost/127.0.0.1 $HOSTNAME localhost/" /etc/hosts + # Sysctl changes + {{ if ( .disableIPv6 | default $.Values.disableIPv6) }} + ## Disable IPv6 + cat < /etc/sysctl.d/10-disable-ipv6.conf + # disable ipv6 config + net.ipv6.conf.all.disable_ipv6 = 1 + net.ipv6.conf.default.disable_ipv6 = 1 + net.ipv6.conf.lo.disable_ipv6 = 1 + systemctl restart systemd-sysctl.service + EOF + {{ end }} + #Increase RegistryQPS + echo "$(jq '.registryPullQPS=100' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json + echo "$(jq '.registryBurst=200' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json + {{ if (.value.registryCache | default $.Values.registryCache) }} + #Registry Cache + mkdir -p /etc/containerd/certs.d/{{ .value.registry | default $.Values.registry }} + cat</etc/containerd/certs.d/{{ .value.registry | default $.Values.registry }}/hosts.toml + server = "https://{{ .value.registry | default $.Values.registry }}" + [host."{{ .value.registryHost | default $.Values.registryHost }}"] + capabilities = ["pull", "resolve"] + {{- if .value.registrySkipVerify | default $.Values.registrySkipVerify }} + skip_verify = true + {{- end }} + EOF + {{- end }} diff --git a/charts/karpenter_nodes/values.yaml b/charts/karpenter_nodes/values.yaml new file mode 100644 index 0000000..fe61fa6 --- /dev/null +++ b/charts/karpenter_nodes/values.yaml @@ -0,0 +1,112 @@ + +##Global Configuration +#Karpenter API Version in CRD +ApiVersion: v1beta1 + +clusterName: "" #My Cluster Name +amiFamily: AL2 #Bottlerocket #AL2023 +IamRole: "" #eks_nodes_role + +subnetSelectorTerms: [] +# - tags: +# cluster: eks-cluster +# karpenter.sh/discovery/eks-cluster: '*' +# - id: subnet-id +## Reusing same Tag Name +# - tags: +# Name: "eks-subnet-1" +# - tags: +# Name: "eks-subnet-2" + +securityGroupSelectorTerms: [] +# - tags: +# cluster: eks-cluster +# karpenter.sh/discovery/eks-cluster: '*' +# - name: my-security-group +# - id: sg-063d7acfb4b06c82c +## Reusing same Tag Name +# - tags: +# Name: "my-security-group-1" +# - tags: +# Name: "my-security-group-2" + +nodeGroupLabelName: nodegroup +operatingSystems: + - linux + +nodeTags: + team: devops + component: eks-karpenter-nodes + +availabilityZones: [] + +#MetaData Options +httpEndpoint: enabled +httpProtocolIPv6: disabled +httpPutResponseHopLimit: 2 +httpTokens: required + +#Storage +blockDeviceMappings: + - deviceName: /dev/xvda + ebs: + volumeSize: 100Gi + volumeType: gp3 + encrypted: false + deleteOnTermination: true + iops: 3000 + throughput: 125 + +registry: "registry-1.docker.io" +registryCache: true +registryHost: "" #"http://registry" +registrySkipVerify: true +disableIPv6: true +expireAfter: "720h" +consolidationPolicy: "WhenUnderutilized" +consolidateAfter: "5m" + +#Default Instance Sizing +instances: + minGeneration: 5 + architecture: "amd64" + categories: + - m + - r + - c + cores: + - "4" + - "8" + - "16" + capacityType: + - spot + - on-demand + + +kubelet: + systemReserved: + cpu: 250m + memory: 200Mi + ephemeral-storage: 2Gi + kubeReserved: + cpu: 250m + memory: 1Gi + ephemeral-storage: 4Gi + evictionHard: + memory.available: 768Mi + nodefs.available: 8% + nodefs.inodesFree: 8% + evictionSoft: + memory.available: 1280Mi + nodefs.available: 10% + nodefs.inodesFree: 15% + imagefs.available: 10% + imagefs.inodesFree: 10% + pid.available: 10% + evictionSoftGracePeriod: + imagefs.available: 10m0s + imagefs.inodesFree: 10m0s + memory.available: 5m0s + nodefs.available: 10m0s + nodefs.inodesFree: 10m0s + pid.available: 2m0s From a43675ae964158a438f1029466db72829f5bc73e Mon Sep 17 00:00:00 2001 From: nadavbuc Date: Mon, 8 Apr 2024 12:59:11 +0300 Subject: [PATCH 02/26] CheckPoint --- .github/PULL_REQUEST_TEMPLATE.md | 12 ++ .github/workflows/release.yml | 27 ++++ charts/karpenter_nodes/Chart.yaml | 1 - .../nodegroups_example_values.yaml | 69 ++++++++- charts/karpenter_nodes/templates/_helpers.tpl | 21 +++ .../karpenter_nodes/templates/headroom.yaml | 62 ++++++++ .../karpenter_nodes/templates/nodeclass.yaml | 20 +-- .../karpenter_nodes/templates/nodepool.yaml | 106 ++++++------- .../templates/priorityclass.yaml | 9 ++ .../karpenter_nodes/tests/nodeclass_test.yaml | 114 ++++++++++++++ .../tests/nodepool_nodes_default_test.yaml | 142 ++++++++++++++++++ .../tests/nodepool_nodes_workers_test.yaml | 141 +++++++++++++++++ .../tests/priorityclass_test.yaml | 21 +++ charts/karpenter_nodes/tests/values.yaml | 130 ++++++++++++++++ .../userdata_example_values.yaml | 7 + charts/karpenter_nodes/values.yaml | 99 +++++++----- robots.txt | 2 + 17 files changed, 872 insertions(+), 111 deletions(-) create mode 100644 .github/PULL_REQUEST_TEMPLATE.md create mode 100644 .github/workflows/release.yml create mode 100644 charts/karpenter_nodes/templates/_helpers.tpl create mode 100644 charts/karpenter_nodes/templates/headroom.yaml create mode 100644 charts/karpenter_nodes/templates/priorityclass.yaml create mode 100644 charts/karpenter_nodes/tests/nodeclass_test.yaml create mode 100644 charts/karpenter_nodes/tests/nodepool_nodes_default_test.yaml create mode 100644 charts/karpenter_nodes/tests/nodepool_nodes_workers_test.yaml create mode 100644 charts/karpenter_nodes/tests/priorityclass_test.yaml create mode 100644 charts/karpenter_nodes/tests/values.yaml create mode 100644 robots.txt diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..90ef1ba --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,12 @@ + + +_Provide a description of what has been changed_ + +### Checklist + +- [ ] README is updated with new configuration values *(if applicable)* [learn more](https://github.com/kedacore/charts/blob/main/CONTRIBUTING.md#documentation) +- [ ] Changes were throughly tested locally +- [ ] Changes are covered by Unit Tests +- [ ] Version is updated in `Chart.yaml` + +Fixes # diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..3421dc9 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,27 @@ +name: Release Charts + +on: + push: + branches: + - master + +jobs: + release: + permissions: + contents: write + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Configure Git + run: | + git config user.name "$GITHUB_ACTOR" + git config user.email "$GITHUB_ACTOR@users.noreply.github.com" + + - name: Run chart-releaser + uses: helm/chart-releaser-action@v1.6.0 + env: + CR_TOKEN: "${{ secrets.GITHUB_TOKEN }}" diff --git a/charts/karpenter_nodes/Chart.yaml b/charts/karpenter_nodes/Chart.yaml index 96de2cd..4fce05e 100644 --- a/charts/karpenter_nodes/Chart.yaml +++ b/charts/karpenter_nodes/Chart.yaml @@ -4,4 +4,3 @@ description: A Helm chart for generating NodeClasses and NodePools for Karpenter type: application version: 0.0.1 -appVersion: "0.34.0" diff --git a/charts/karpenter_nodes/nodegroups_example_values.yaml b/charts/karpenter_nodes/nodegroups_example_values.yaml index 7beac00..8c5ae51 100644 --- a/charts/karpenter_nodes/nodegroups_example_values.yaml +++ b/charts/karpenter_nodes/nodegroups_example_values.yaml @@ -1,8 +1,71 @@ excludeInstanceSize: - metal +excludeFamilies: + - c6a + - m6a + - r6a + - c5a + - m5a + - r5a + - c6ad + - m6ad + - r6ad + - m5ad + - r5ad + - r5ad nodeGroups: - nodes-defaults: - instances: {} - nodes-test: + nodes-default: + blockDeviceMappings: + - deviceName: /dev/xvdb + ebs: + volumeSize: 100Gi + volumeType: gp3 + encrypted: false + deleteOnTermination: true + iops: 3000 + throughput: 125 instances: {} + nodeHeadRooms: + - size: small + count: 2 + labels: + testlabel1: label1 + testlabel2: label2 + taints: + - key: testtaint1 + effect: noSchedule + value: taint1 + - key: testtaint2 + effect: noSchedule + value: taint2 + nodes-workers: + consolidation: "false" + blockDeviceMappings: + - deviceName: /dev/xvda + ebs: + volumeSize: 100Gi + volumeType: gp3 + encrypted: false + deleteOnTermination: true + iops: 3000 + throughput: 125 + instances: + architecture: "arm64" + minGeneration: 5 + categories: + - t + - x + cores: + - "4" + capacityType: + - on-demand + subnets: + - workers-1a + - workers-1b + - workers-1c + securityGroups: + - workersonly + availabilityZones: + - eu-west-1a + - eu-west-1b diff --git a/charts/karpenter_nodes/templates/_helpers.tpl b/charts/karpenter_nodes/templates/_helpers.tpl new file mode 100644 index 0000000..cdbf84a --- /dev/null +++ b/charts/karpenter_nodes/templates/_helpers.tpl @@ -0,0 +1,21 @@ +{{/*Define headroom sizes*/}} +{{- define "headroom.sizing" -}} +{{- range $key, $val := .Args }} +{{- if eq $val "small" }} +cpu: "1" +memory: "4Gi" +{{- end }} +{{- if eq $val "medium" }} +cpu: "2" +memory: "8Gi" +{{- end }} +{{- if eq $val "large" }} +cpu: "4" +memory: "16Gi" +{{- end }} +{{- if eq $val "xlarge" }} +cpu: "8" +memory: "32Gi" +{{- end }} +{{- end }} +{{- end }} diff --git a/charts/karpenter_nodes/templates/headroom.yaml b/charts/karpenter_nodes/templates/headroom.yaml new file mode 100644 index 0000000..99f2722 --- /dev/null +++ b/charts/karpenter_nodes/templates/headroom.yaml @@ -0,0 +1,62 @@ +{{- range $k, $v := .Values.nodeGroups }} +{{- range $nhr := $v.headRoom }} +{{ $data := + dict "sv" $nhr.size +}} +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: headroom-{{ $k }}-{{ $v.instances.architecture | default $.Values.instances.architecture }}-{{ $nhr.size }} + namespace: {{ $.Values.headRoomNamespace | default "karpenter" }} + labels: + k8s-app: headroom-{{ $k }}-{{ $v.instances.architecture | default $.Values.instances.architecture }}-{{ $nhr.size }} +spec: + replicas: {{ $nhr.count | default 1}} + selector: + matchLabels: + k8s-app: headroom-{{ $k }}-{{ $v.instances.architecture | default $.Values.instances.architecture }}-{{ $nhr.size }} + template: + metadata: + labels: + k8s-app: headroom-{{ $k }}-{{ $v.instances.architecture | default $.Values.instances.architecture }}-{{ $nhr.size }} + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: karpenter.sh/nodepool + operator: In + values: + - {{ $k }}-{{ $v.instances.architecture | default $.Values.instances.architecture }} + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: k8s-app + operator: In + values: + - headroom-{{ $k }}-{{ $v.instances.architecture | default $.Values.instances.architecture }}-{{ $nhr.size }} + {{- $nhr.antiAffinitySpec | toYaml | nindent 14 }} + topologyKey: kubernetes.io/hostname + {{- if hasKey $nhr "nameSpaces" }} + namespaces: + - {{ $.Values.headRoomNamespace | default "karpenter" }} + {{- range $nhr.nameSpaces }} + - {{ . }} + {{- end }} + {{- else }} + namespaces: {} + {{- end }} + tolerations: + - operator: Exists + containers: + - name: pause + image: registry.k8s.io/pause + resources: + requests: + {{- include "headroom.sizing" (merge (dict "Args" $data) . ) | indent 12 }} + priorityClassName: karpenter-headroom +{{- end }} +{{- end }} diff --git a/charts/karpenter_nodes/templates/nodeclass.yaml b/charts/karpenter_nodes/templates/nodeclass.yaml index ae85571..5af883e 100644 --- a/charts/karpenter_nodes/templates/nodeclass.yaml +++ b/charts/karpenter_nodes/templates/nodeclass.yaml @@ -7,7 +7,6 @@ kind: EC2NodeClass metadata: name: "{{ $k }}-{{ $v.instances.architecture | default $.Values.instances.architecture }}" spec: - #IAM Role or Instance Profile to be attached to the instance {{- if hasKey $v "IamRole" }} role: {{ $v.IamRole }} {{- else if hasKey $v "IamInstanceProfile" }} @@ -19,17 +18,13 @@ spec: {{- else }} {{- fail "nodeClass error: Either IamRole or IamInstanceProfile must be defined" }} {{- end }} - #AMI Family (Can be overwritten with selector) amiFamily: {{ $v.amiFamily | default $.Values.amiFamily }} - amiSelectorTerms: {{- if hasKey $v "amiSelectorTerms" }} {{- toYaml $v.amiSelectorTerms | nindent 4 }} {{- else }} {{- toYaml $.Values.amiSelectorTerms | nindent 4 }} {{- end }} - - #Instance Network Configuration subnetSelectorTerms: {{- if hasKey $v "subnetSelectorTerms" }} {{- toYaml $v.subnetSelectorTerms | nindent 4 }} @@ -42,34 +37,27 @@ spec: {{- else }} {{- toYaml $.Values.securityGroupSelectorTerms | nindent 4 }} {{- end }} - - #Instance Tags tags: cluster: {{ $.Values.clusterName }} {{ $.Values.nodeGroupLabelName }}: {{ $v.nodeGroupLabel | default $k }} - {{ if or hasKey ($v "nodeTags") ($.Values "nodeTags") }} - {{- toYaml ($v.nodeTags | default $.Values.tags) | nindent 4 }} + {{- if or (hasKey $v "nodeTags") (hasKey $.Values "nodeTags") }} + {{- toYaml ($v.nodeTags | default $.Values.nodeTags) | nindent 4 }} {{- end }} - {{ if hasKey $v "additionalNodeTags" }} + {{- if hasKey $v "additionalNodeTags" }} {{- toYaml $v.tags | nindent 4 }} {{- end }} - managed_by: "karpenter" - - #Instace Volumes + managed_by: karpenter blockDeviceMappings: {{- if hasKey $v "blockDeviceMappings" }} {{- toYaml $v.blockDeviceMappings | nindent 4 }} {{- else }} {{- toYaml $.Values.blockDeviceMappings | nindent 4 }} {{- end }} - {{- if hasKey $v "instanceStorePolicy" }} instanceStorePolicy: {{ $v.instanceStorePolicy }} {{- else if hasKey $.Values "instanceStorePolicy" }} instanceStorePolicy: {{ $.Values.instanceStorePolicy }} {{- end }} - - #Instance Metadata metadataOptions: httpEndpoint: {{ $v.httpEndpoint | default $.Values.httpEndpoint }} httpProtocolIPv6: {{ $v.httpProtocolIPv6 | default $.Values.httpProtocolIPv6 }} diff --git a/charts/karpenter_nodes/templates/nodepool.yaml b/charts/karpenter_nodes/templates/nodepool.yaml index 4bd8629..476dbf8 100644 --- a/charts/karpenter_nodes/templates/nodepool.yaml +++ b/charts/karpenter_nodes/templates/nodepool.yaml @@ -1,7 +1,6 @@ {{- range $k, $v := $.Values.nodeGroups }} --- apiVersion: karpenter.k8s.aws/{{ $.Values.ApiVersion }} -kind: EC2NodeClass kind: NodePool metadata: name: "{{ $k }}-{{ $v.instances.architecture | default $.Values.instances.architecture }}" @@ -29,10 +28,9 @@ spec: {{- else }} name: {{ $k }}-{{ $v.instances.architecture | default $.Values.instances.architecture }} {{- end }} - {{- if or ($v.additionalTaints) (eq ($v.nodeLabelTaint | default $.Values.nodeLabelTaint) "true") (eq ($v.instances.architecture | default $.Values.instances.architecture) "arm64") }} - {{- if haskey $v.taints }} + {{- if hasKey $v "taints" }} taints: - {{- range $v.$taints }} + {{- range $v.taints }} - key: {{ .key }} value: {{ .value }} effect: {{ .effect }} @@ -59,11 +57,10 @@ spec: {{- range $v.instances.cores | default $.Values.instances.cores }} - {{ . | quote }} {{- end }} - {{- if not $v.instances.noMinimumGeneration }} - key: karpenter.k8s.aws/instance-generation operator: Gt - values: {{ sub ($v.instances.minGeneration | default $.Values.instances.minGeneration) 1 | quote | list }} - {{- end }} + values: + - {{ sub ($v.instances.minGeneration | default $.Values.instances.minGeneration) 1 | quote }} - key: "topology.kubernetes.io/zone" operator: In values: @@ -96,7 +93,7 @@ spec: operator: NotIn values: {{- if hasKey $v "excludeFamilies" }} - {{- range $v .excludeFamilies } + {{- range $v.excludeFamilies }} - {{ . }} {{- end }} {{- else }} @@ -144,74 +141,77 @@ spec: {{- if hasKey $v.instances "instanceTypes" }} - key: node.kubernetes.io/instance-type operator: In - values: {{- range $v.instances.instanceTypes }} + values: + {{- range $v.instances.instanceTypes }} - {{ . -}} {{- end }} {{- end }} kubelet: - {{- if or (hasKey $v.kubelet.clusterDNS) (hasKey $.Values.kubelet.clusterDNS) }} - clusterDNS: - {{- if hasKey $v.kubelet.clusterDNS }} - {{- range $v.kubelet.clusterDNS }} - {{ . }} - {{- end }} - {{- else }} - {{- range $.Values.kubelet.clusterDNS }} - {{ . }} - {{- end }} - {{- end }} + {{- if or (hasKey $v "kubeletClusterDNS") (hasKey $.Values "kubeletClusterDNS") }} + clusterDNS: + {{- if hasKey $v "kubeletClusterDNS" }} + {{- range $v.kubeletClusterDNS }} + - {{ . }} {{- end }} + {{- else }} + {{- range $.Values.kubeletClusterDNS }} + - {{ . }} + {{- end }} + {{- end }} + {{- end }} systemReserved: - cpu: {{ $v.kubelet.systemReserved.cpu | default $.Values.kubelet.systemReserved.cpu }} - memory: {{ $v.kubelet.systemReserved.memory | default $.Values.kubelet.systemReserved.memory }} - ephemeral-storage: {{ $v.kubelet.systemReserved.ephemeralStorage | default $.Values.kubelet.systemReserved.ephemeralStorage }} + cpu: {{ $v.kubeletSystemReservedCpu | default $.Values.kubeletSystemReservedCpu }} + memory: {{ $v.kubeletSystemReservedMemory | default $.Values.kubeletSystemReservedMemory }} + ephemeral-storage: {{ $v.kubeletSystemReservedEphemeralStorage | default $.Values.kubeletSystemReservedEphemeralStorage }} kubeReserved: - cpu: {{ $v.kubelet.kubeReserved.cpu | default $.Values.kubelet.kubeReserved.cpu }} - memory: {{ $v.kubelet.kubeReserved.memory | default $.Values.kubelet.kubeReserved.memory }} - ephemeral-storage: {{ $v.kubelet.kubeReserved.ephemeralStorage | default $.Values.kubelet.kubeReserved.ephemeralStorage }} + cpu: {{ $v.kubeletKubeReservedCpu | default $.Values.kubeletKubeReservedCpu }} + memory: {{ $v.kubeletKubeReservedMemory | default $.Values.kubeletKubeReservedMemory }} + ephemeral-storage: {{ $v.kubeletKubeReservedEphemeralStorage | default $.Values.kubeletKubeReservedEphemeralStorage }} evictionHard: - memory.available: {{ $v.kubelet.evictionHard.memory.available | default $.Values.kubelet.evictionHard.memory.available }} - nodefs.available: {{ $v.kubelet.evictionHard.nodefs.available | default $.Values.kubelet.evictionHard.nodefs.available }} - nodefs.inodesFree: {{ $v.kubelet.evictionHard.nodefs.inodesFree | default $.Values.kubelet.evictionHard.nodefs.inodesFree }} + memory.available: {{ $v.kubeletEvictionHardMemoryAvailable | default $.Values.kubeletEvictionHardMemoryAvailable }} + nodefs.available: {{ $v.kubeletEvictionHardNodefsAvailable | default $.Values.kubeletEvictionHardNodefsAvailable }} + nodefs.inodesFree: {{ $v.kubeletEvictionHardNodefsInodesFree | default $.Values.kubeletEvictionHardNodefsInodesFree }} evictionSoft: - memory.available: {{ $v.kubelet.evictionSoft.memory.available | default $.Values.kubelet.evictionSoft.memory.available }} - nodefs.available: {{ $v.kubelet.evictionSoft.nodefs.available | default $.Values.kubelet.evictionSoft.nodefs.available }} - nodefs.inodesFree: {{ $v.kubelet.evictionSoft.nodefs.inodesFree | default $.Values.kubelet.evictionSoft.nodefs.inodesFree }} - imagefs.available: {{ $v.kubelet.evictionSoft.imagefs.available | default $.Values.kubelet.evictionSoft.imagefs.available }} - imagefs.inodesFree: {{ $v.kubelet.evictionSoft.imagefs.inodesFree | default $.Values.kubelet.evictionSoft.imagefs.inodesFree }} - pid.available: {{ $v.kubelet.evictionSoft.pid.available | default $.Values.kubelet.evictionSoft.pid.available }} + memory.available: {{ $v.kubeletEvictionSoftMemoryAvailable | default $.Values.kubeletEvictionSoftMemoryAvailable }} + nodefs.available: {{ $v.kubeletEvictionSoftNodefsAvailable | default $.Values.kubeletEvictionSoftNodefsAvailable }} + nodefs.inodesFree: {{ $v.kubeletEvictionSoftNodefsInodesFree | default $.Values.kubeletEvictionSoftNodefsInodesFree }} + imagefs.available: {{ $v.kubeletEvictionSoftImagefsAvailable | default $.Values.kubeletEvictionSoftImagefsAvailable }} + imagefs.inodesFree: {{ $v.kubeletEvictionSoftImagefsInodesFree | default $.Values.kubeletEvictionSoftImagefsInodesFree }} + pid.available: {{ $v.kubeletEvictionSoftPidAvailable | default $.Values.kubeletEvictionSoftPidAvailable }} evictionSoftGracePeriod: - imagefs.available: {{ $v.kubelet.evictionSoftGracePeriod.imagefs.available | default $.Values.kubelet.evictionSoftGracePeriod.imagefs.available }} - imagefs.inodesFree: {{ $v.kubelet.evictionSoftGracePeriod.imagefs.inodesFree | default $.Values.kubelet.evictionSoftGracePeriod.imagefs.inodesFree }} - memory.available: {{ $v.kubelet.evictionSoftGracePeriod.memory.available | default $.Values.kubelet.evictionSoftGracePeriod.memory.available }} - nodefs.available: {{ $v.kubelet.evictionSoftGracePeriod.nodefs.available | default $.Values.kubelet.evictionSoftGracePeriod.nodefs.available }} - nodefs.inodesFree: {{ $v.kubelet.evictionSoftGracePeriod.nodefs.inodesFree | default $.Values.kubelet.evictionSoftGracePeriod.nodefs.inodesFree }} - pid.available: {{ $v.kubelet.evictionSoftGracePeriod.pid.available | default $.Values.kubelet.evictionSoftGracePeriod.pid.available }} - {{- if or (hasKey $v.kubelet.imageGCHighThresholdPercent) (hasKey $.Values.kubelet.imageGCHighThresholdPercent) }} - imageGCHighThresholdPercent: {{ $v.kubelet.imageGCHighThresholdPercent | default $.Values.kubelet.imageGCHighThresholdPercent }} + imagefs.available: {{ $v.kubeletEvictionSoftGracePeriodImagefsAvailable | default $.Values.kubeletEvictionSoftGracePeriodImagefsAvailable }} + imagefs.inodesFree: {{ $v.kubeletEvictionSoftGracePeriodImagefsInodesFree | default $.Values.kubeletEvictionSoftGracePeriodImagefsInodesFree }} + memory.available: {{ $v.kubeletEvictionSoftGracePeriodMemoryAvailable | default $.Values.kubeletEvictionSoftGracePeriodMemoryAvailable }} + nodefs.available: {{ $v.kubeletEvictionSoftGracePeriodNodefsAvailable | default $.Values.kubeletEvictionSoftGracePeriodNodefsAvailable }} + nodefs.inodesFree: {{ $v.kubeletEvictionSoftGracePeriodNodefsInodesFree | default $.Values.kubeletEvictionSoftGracePeriodNodefsInodesFree }} + pid.available: {{ $v.kubeletEvictionSoftGracePeriodPidAvailable | default $.Values.kubeletEvictionSoftGracePeriodPidAvailable }} + {{- if or (hasKey $v "kubeletImageGCHighThresholdPercent") (hasKey $.Values "kubeletImageGCHighThresholdPercent") }} + imageGCHighThresholdPercent: {{ $v.kubeletImageGCHighThresholdPercent | default $.Values.kubeletImageGCHighThresholdPercent }} {{- end }} - {{- if or (hasKey $v.kubelet.imageGCLowThresholdPercent) (hasKey $.Values.kubelet.imageGCLowThresholdPercent) }} - imageGCLowThresholdPercent: {{ $v.kubelet.imageGCLowThresholdPercent | default $.Values.kubelet.imageGCLowThresholdPercent }} + {{- if or (hasKey $v "kubeletImageGCLowThresholdPercent") (hasKey $.Values "kubeletImageGCLowThresholdPercent") }} + imageGCLowThresholdPercent: {{ $v.kubeletImageGCLowThresholdPercent | default $.Values.kubeletImageGCLowThresholdPercent }} {{- end }} - {{- if or (hasKey $v.kubelet.imageMinimumGCAge) (hasKey $.Values.kubelet.imageMinimumGCAge) }} - cpuCFSQuota: {{ $v.kubelet.cpuCFSQuota | default $.Values.kubelet.cpuCFSQuota }} + {{- if or (hasKey $v "kubeletImageMinimumGCAge") (hasKey $.Values "kubeletImageMinimumGCAge") }} + imageMinimumGCAge: {{ $v.kubeletImageMinimumGCAge | default $.Values.kubeletImageMinimumGCAge }} {{- end }} - {{- if or (hasKey $v.kubelet.cpuCFSQuota) (hasKey $.Values.kubelet.cpuCFSQuota) }} - podsPerCore: {{ $v.kubelet.podsPerCore | default $.Values.kubelet.podsPerCore }} + {{- if or (hasKey $v "kubeletCpuCFSQuota") (hasKey $.Values "kubeletCpuCFSQuota") }} + cpuCFSQuota: {{ $v.kubeletCpuCFSQuota | default $.Values.kubeletCpuCFSQuota }} {{- end }} - {{- if or (hasKey $v.kubelet.podsPerCore) (hasKey $.Values.kubelet.podsPerCore) }} - maxPods: {{ $v.kubelet.maxPods | default $.Values.kubelet.maxPods }} + {{- if or (hasKey $v "kubeletPodsPerCore") (hasKey $.Values "kubeletPodsPerCore") }} + podsPerCore: {{ $v.kubeletPodsPerCore | default $.Values.kubeletPodsPerCore }} + {{- end }} + {{- if or (hasKey $v "kubeletMaxPods") (hasKey $.Values "kubeletMaxPods") }} + maxPods: {{ $v.kubeletMaxPods | default $.Values.kubeletMaxPods }} {{- end }} disruption: expireAfter: {{ $v.expireAfter | default $.Values.expireAfter }} consolidationPolicy: {{$v.consolidationPolicy | default $.Values.consolidationPolicy}} consolidateAfter: {{ $v.consolidateAfter | default $.Values.consolidateAfter }} - {{- end }} {{- if $v.budgets }} budgets: {{- $v.budgets | toYaml | nindent 6 }} {{- end }} - {{- if haskey $v.limits }} + {{- if hasKey $v "limits" }} limits: {{- range $limitName, $limitValue := $v.limits }} {{ $limitName }}: {{ $limitValue }} diff --git a/charts/karpenter_nodes/templates/priorityclass.yaml b/charts/karpenter_nodes/templates/priorityclass.yaml new file mode 100644 index 0000000..f3b75b7 --- /dev/null +++ b/charts/karpenter_nodes/templates/priorityclass.yaml @@ -0,0 +1,9 @@ +{{- if .Values.headRoom -}} +apiVersion: scheduling.k8s.io/v1 +kind: PriorityClass +metadata: + name: karpenter-headroom +value: -1000000 +globalDefault: false +description: "Used for dummy pods to generate headroom in karpenter" +{{- end -}} diff --git a/charts/karpenter_nodes/tests/nodeclass_test.yaml b/charts/karpenter_nodes/tests/nodeclass_test.yaml new file mode 100644 index 0000000..683fac4 --- /dev/null +++ b/charts/karpenter_nodes/tests/nodeclass_test.yaml @@ -0,0 +1,114 @@ +suite: test nodeclasses +templates: + - nodeclass.yaml + +values: + - values.yaml +tests: + - it: Verify nodes-default + documentIndex: 0 + asserts: + - isKind: + of: EC2NodeClass + - equal: + path: metadata.name + value: nodes-default-amd64 + - equal: + path: spec.role + value: eks_nodes_role + - isNull: + path: spec.instanceProfile + - equal: + path: spec.amiFamily + value: AL2 + - equal: + path: spec.subnetSelectorTerms[0].tags.Name + value: eks-subnet-1 + - equal: + path: spec.securityGroupSelectorTerms[1].tags.Name + value: my-security-group-2 + - equal: + path: spec.tags.nodegroup + value: nodes-default + - equal: + path: spec.tags.component + value: eks-karpenter-nodes + - equal: + path: spec.tags.cluster + value: eks-dev + - equal: + path: spec.blockDeviceMappings[0].deviceName + value: /dev/xvda + - equal: + path: spec.blockDeviceMappings[0].ebs.volumeSize + value: 100Gi + - isNull: + path: spec.instanceStorePolicy + - equal: + path: spec.metadataOptions.httpTokens + value: required + - equal: + path: spec.metadataOptions.httpEndpoint + value: enabled + - equal: + path: spec.metadataOptions.httpProtocolIPv6 + value: disabled + - equal: + path: spec.metadataOptions.httpPutResponseHopLimit + value: 2 + + - it: Verify nodes-workers + documentIndex: 1 + asserts: + - isKind: + of: EC2NodeClass + - equal: + path: metadata.name + value: nodes-workers-arm64 + - isNull: + path: spec.role + - equal: + path: spec.instanceProfile + value: arn:aws:blablablabla + - equal: + path: spec.amiFamily + value: BottleRocket + - equal: + path: spec.subnetSelectorTerms[0].tags.Name + value: eks-subnet-workers-1 + - equal: + path: spec.securityGroupSelectorTerms[1].tags.Name + value: my-security-group-workers-2 + - equal: + path: spec.tags.nodegroup + value: nodes-workers + - equal: + path: spec.tags.component + value: eks-karpenter-nodes + - equal: + path: spec.tags.cluster + value: eks-dev + - equal: + path: spec.blockDeviceMappings[0].deviceName + value: /dev/xvda + - equal: + path: spec.blockDeviceMappings[0].ebs.volumeSize + value: 150Gi + - equal: + path: spec.instanceStorePolicy + value: test + - equal: + path: spec.metadataOptions.httpTokens + value: required + - equal: + path: spec.metadataOptions.httpEndpoint + value: enabled + - equal: + path: spec.metadataOptions.httpProtocolIPv6 + value: disabled + - equal: + path: spec.metadataOptions.httpPutResponseHopLimit + value: 2 + + + diff --git a/charts/karpenter_nodes/tests/nodepool_nodes_default_test.yaml b/charts/karpenter_nodes/tests/nodepool_nodes_default_test.yaml new file mode 100644 index 0000000..cb17749 --- /dev/null +++ b/charts/karpenter_nodes/tests/nodepool_nodes_default_test.yaml @@ -0,0 +1,142 @@ +suite: test nodepools +templates: + - nodepool.yaml + +values: + - values.yaml + +tests: + - it: Verify nodes-default metadata + documentIndex: 0 + asserts: + - isKind: + of: NodePool + - equal: + path: metadata.name + value: nodes-default-amd64 + - equal: + path: spec.template.metadata.labels.cluster + value: eks-dev + - equal: + path: spec.template.metadata.labels.nodegroup + value: nodes-default + - equal: + path: spec.template.metadata.labels.testlabel1 + value: label1 + - equal: + path: spec.template.metadata.labels.testlabel2 + value: label2 + - equal: + path: spec.template.spec.nodeClassRef.name + value: nodes-default-amd64 + - equal: + path: spec.template.spec.taints[0].key + value: testtaint1 + - equal: + path: spec.template.spec.taints[0].value + value: taint1 + - equal: + path: spec.template.spec.taints[0].effect + value: noSchedule + - equal: + path: spec.template.spec.taints[1].key + value: testtaint2 + - equal: + path: spec.template.spec.taints[1].value + value: taint2 + - equal: + path: spec.template.spec.taints[1].effect + value: noSchedule + + + - it: Verify nodes-default requirements + documentIndex: 0 + asserts: + # instance-category + - notContains: + path: spec.template.spec.requirements[0].values + content: t + - equal: + path: spec.template.spec.requirements[0].values[0] + value: m + - equal: + path: spec.template.spec.requirements[0].values[2] + value: c + # instance-cpu + - equal: + path: spec.template.spec.requirements[1].values[0] + value: "4" + - equal: + path: spec.template.spec.requirements[1].values[2] + value: "16" + # instance-generation + - equal: + path: spec.template.spec.requirements[2].operator + value: "Gt" + - equal: + path: spec.template.spec.requirements[2].values[0] + value: "6" + # instance-zone + - equal: + path: spec.template.spec.requirements[3].values[0] + value: "eu-west-1a" + - equal: + path: spec.template.spec.requirements[3].values[2] + value: "eu-west-1c" + # instance-architecture + - equal: + path: spec.template.spec.requirements[4].values[0] + value: "amd64" + # instance-capacity-type + - equal: + path: spec.template.spec.requirements[5].values[0] + value: "spot" + - equal: + path: spec.template.spec.requirements[5].values[1] + value: "on-demand" + # instance-OS + - equal: + path: spec.template.spec.requirements[6].values[0] + value: "linux" + # instance-family Exclusions + - equal: + path: spec.template.spec.requirements[7].values[0] + value: "c6a" + # instance-size Exclusiong + - equal: + path: spec.template.spec.requirements[8].operator + value: NotIn + - equal: + path: spec.template.spec.requirements[8].values[0] + value: metal + + - it: Verify nodes-default kubelet + documentIndex: 0 + asserts: + - equal: + path: spec.template.spec.kubelet.systemReserved.cpu + value: 250m + - equal: + path: spec.template.spec.kubelet.evictionHard.memory\.available + value: 768Mi + + - it: Verify nodes-default Options + documentIndex: 0 + asserts: + - equal: + path: spec.disruption.expireAfter + value: 720h + - equal: + path: spec.disruption.consolidationPolicy + value: WhenUnderutilized + - equal: + path: spec.disruption.consolidateAfter + value: 5m + - isNull: + path: spec.budgets + - isNull: + path: spec.limits + - equal: + path: spec.weight + value: 1 + diff --git a/charts/karpenter_nodes/tests/nodepool_nodes_workers_test.yaml b/charts/karpenter_nodes/tests/nodepool_nodes_workers_test.yaml new file mode 100644 index 0000000..26a123c --- /dev/null +++ b/charts/karpenter_nodes/tests/nodepool_nodes_workers_test.yaml @@ -0,0 +1,141 @@ +suite: test nodepools +templates: + - nodepool.yaml + +values: + - values.yaml + +tests: + - it: Verify nodes-workers metadata + documentIndex: 1 + asserts: + - isKind: + of: NodePool + - equal: + path: metadata.name + value: nodes-workers-arm64 + - equal: + path: spec.template.metadata.labels.cluster + value: eks-dev + - equal: + path: spec.template.metadata.labels.nodegroup + value: nodes-workers + - isNull: + path: spec.template.metadata.labels.testlabel1 + - equal: + path: spec.template.spec.nodeClassRef.name + value: nodes-workers-arm64 + - equal: + path: spec.template.spec.startupTaints[0].key + value: testtaint1 + - equal: + path: spec.template.spec.startupTaints[0].value + value: taint1 + - equal: + path: spec.template.spec.startupTaints[0].effect + value: noSchedule + - equal: + path: spec.template.spec.startupTaints[1].key + value: testtaint2 + - equal: + path: spec.template.spec.startupTaints[1].value + value: taint2 + - equal: + path: spec.template.spec.startupTaints[1].effect + value: noSchedule + + - it: Verify nodes-workers requirements + documentIndex: 1 + asserts: + # instance-category + - notContains: + path: spec.template.spec.requirements[0].values + content: m + - equal: + path: spec.template.spec.requirements[0].values[0] + value: t + - equal: + path: spec.template.spec.requirements[0].values[1] + value: x + # instance-cpu + - equal: + path: spec.template.spec.requirements[1].values[0] + value: "2" + - equal: + path: spec.template.spec.requirements[1].values[1] + value: "6" + # instance-generation + - equal: + path: spec.template.spec.requirements[2].operator + value: "Gt" + - equal: + path: spec.template.spec.requirements[2].values[0] + value: "4" + # instance-zone + - equal: + path: spec.template.spec.requirements[3].values[0] + value: "eu-west-1g" + # instance-architecture + - equal: + path: spec.template.spec.requirements[4].values[0] + value: "arm64" + # instance-capacity-type + - equal: + path: spec.template.spec.requirements[5].values[0] + value: "on-demand" + # instance-OS + - equal: + path: spec.template.spec.requirements[6].values[0] + value: "linux" + # instance-family Exclusions + - equal: + path: spec.template.spec.requirements[7].values[0] + value: "m6a" + # instance-size Exclusiong + - equal: + path: spec.template.spec.requirements[8].operator + value: NotIn + - equal: + path: spec.template.spec.requirements[8].values[0] + value: metal + + - it: Verify nodes-workers kubelet + documentIndex: 1 + asserts: + - equal: + path: spec.template.spec.kubelet.systemReserved.cpu + value: 750m + - equal: + path: spec.template.spec.kubelet.evictionHard.memory\.available + value: 768Mi + - equal: + path: spec.template.spec.kubelet.clusterDNS[0] + value: "1.1.1.1" + - equal: + path: spec.template.spec.kubelet.clusterDNS[1] + value: "2.2.2.2" + + - it: Verify nodes-workers Options + documentIndex: 1 + asserts: + - equal: + path: spec.disruption.expireAfter + value: 720h + - equal: + path: spec.disruption.consolidationPolicy + value: WhenEmpty + - equal: + path: spec.disruption.consolidateAfter + value: 10m + - isNull: + path: spec.budgets + - equal: + path: spec.limits.cpu + value: 100 + - equal: + path: spec.limits.memory + value: "384Gi" + - equal: + path: spec.weight + value: 3 + diff --git a/charts/karpenter_nodes/tests/priorityclass_test.yaml b/charts/karpenter_nodes/tests/priorityclass_test.yaml new file mode 100644 index 0000000..f78d3ff --- /dev/null +++ b/charts/karpenter_nodes/tests/priorityclass_test.yaml @@ -0,0 +1,21 @@ +suite: test nodeclasses +templates: + - priorityclass.yaml + +values: + - values.yaml +tests: + - it: Verify priorityclass + documentIndex: 0 + asserts: + - isKind: + of: PriorityClass + - equal: + path: metadata.name + value: karpenter-headroom + - equal: + path: value + value: -1000000 + - equal: + path: globalDefault + value: false diff --git a/charts/karpenter_nodes/tests/values.yaml b/charts/karpenter_nodes/tests/values.yaml new file mode 100644 index 0000000..e70506b --- /dev/null +++ b/charts/karpenter_nodes/tests/values.yaml @@ -0,0 +1,130 @@ +#Nodegroups and customizeable overwrites +nodeGroups: + nodes-default: + instances: + minGeneration: 7 + budget: + - nodes: "2" + headRoom: + - size: small + count: 2 + antiAffinitySpec: + - key: testlabel1 + operator: Exists + + labels: + testlabel1: label1 + testlabel2: label2 + taints: + - key: testtaint1 + effect: noSchedule + value: taint1 + - key: testtaint2 + effect: noSchedule + value: taint2 + excludeFamilies: + - c6a + + nodes-workers: + weight: 3 + amiFamily: BottleRocket + consolidationPolicy: "WhenEmpty" + consolidateAfter: "10m" + blockDeviceMappings: + - deviceName: /dev/xvda + ebs: + volumeSize: 150Gi + volumeType: gp3 + encrypted: false + deleteOnTermination: true + iops: 3000 + throughput: 125 + IamInstanceProfile: "arn:aws:blablablabla" + excludeFamilies: + - m6a + headRoom: + - size: xlarge + count: 4 + antiAffinitySpec: + - key: testlabel2 + operator: In + values: + - value1 + - value2 + nameSpaces: + - default + - kube-system + + instances: + architecture: "arm64" + minGeneration: 5 + categories: + - t + - x + cores: + - "2" + - "6" + capacityType: + - on-demand + subnetSelectorTerms: + - tags: + Name: "eks-subnet-workers-1" + - tags: + Name: "eks-subnet-workers-2" + - tags: + Name: "eks-subnet-workers-3" + securityGroupSelectorTerms: + - tags: + Name: "my-security-group-workers-1" + - tags: + Name: "my-security-group-workers-2" + - tags: + Name: "my-security-group-workers-3" + availabilityZones: + - eu-west-1g + instanceStorePolicy: "test" + kubeletSystemReservedCpu: 750m + kubeletClusterDNS: + - "1.1.1.1" + - "2.2.2.2" + userdata: | + echo "Nodepool name is {{ default .key }}" + capacitySpread: + start: 1 + end: 5 + limits: + cpu: "100" + memory: "384Gi" + startupTaints: + - key: testtaint1 + effect: noSchedule + value: taint1 + - key: testtaint2 + effect: noSchedule + value: taint2 + +#Default cluster Settings +clusterName: "eks-dev" +subnetSelectorTerms: + - tags: + Name: "eks-subnet-1" + - tags: + Name: "eks-subnet-2" + - tags: + Name: "eks-subnet-3" + +securityGroupSelectorTerms: + - tags: + Name: "my-security-group-1" + - tags: + Name: "my-security-group-2" + - tags: + Name: "my-security-group-3" + +availabilityZones: + - eu-west-1a + - eu-west-1b + - eu-west-1c + +excludeInstanceSize: + - metal diff --git a/charts/karpenter_nodes/userdata_example_values.yaml b/charts/karpenter_nodes/userdata_example_values.yaml index 740ffff..6b5a06b 100644 --- a/charts/karpenter_nodes/userdata_example_values.yaml +++ b/charts/karpenter_nodes/userdata_example_values.yaml @@ -1,3 +1,10 @@ +registry: "registry-1.docker.io" +registryCache: true +registryHost: "" #"http://registry" +registrySkipVerify: true +disableIPv6: true + + userData: | CLUSTER_NAME={{ $.Values.clusterName }} INSTANCEGROUP={{ .value.nodeGroupLabel | default .key }} diff --git a/charts/karpenter_nodes/values.yaml b/charts/karpenter_nodes/values.yaml index fe61fa6..e2703b9 100644 --- a/charts/karpenter_nodes/values.yaml +++ b/charts/karpenter_nodes/values.yaml @@ -1,11 +1,12 @@ - ##Global Configuration + #Karpenter API Version in CRD ApiVersion: v1beta1 -clusterName: "" #My Cluster Name -amiFamily: AL2 #Bottlerocket #AL2023 -IamRole: "" #eks_nodes_role +#Nodes Configuration +clusterName: "eks-cluster" #My Cluster Name +amiFamily: AL2 #Bottlerocket #AL2023, Can be overridden by amiSelectorTerms +IamRole: eks_nodes_role subnetSelectorTerms: [] # - tags: @@ -31,8 +32,6 @@ securityGroupSelectorTerms: [] # Name: "my-security-group-2" nodeGroupLabelName: nodegroup -operatingSystems: - - linux nodeTags: team: devops @@ -57,11 +56,7 @@ blockDeviceMappings: iops: 3000 throughput: 125 -registry: "registry-1.docker.io" -registryCache: true -registryHost: "" #"http://registry" -registrySkipVerify: true -disableIPv6: true +#Consolidation Options expireAfter: "720h" consolidationPolicy: "WhenUnderutilized" consolidateAfter: "5m" @@ -81,32 +76,60 @@ instances: capacityType: - spot - on-demand + operatingSystems: + - linux + +##Exclude weak types of AMD instances +#excludeFamilies: +# - c6a +# - m6a +# - r6a +# - c5a +# - m5a +# - r5a +# - c6ad +# - m6ad +# - r6ad +# - m5ad +# - r5ad +# - r5ad + +##Exclude Expensive types like Metal +#excludeInstanceSize: +# - metal + +#Kubelet Configuration paramemters +#kubeletClusterDNS: [] +kubeletSystemReservedCpu: 250m +kubeletSystemReservedMemory: 200Mi +kubeletSystemReservedEphemeralStorage: 2Gi +kubeletKubeReservedCpu: 250m +kubeletKubeReservedMemory: 1Gi +kubeletKubeReservedEphemeralStorage: 4Gi +kubeletEvictionHardMemoryAvailable: 768Mi +kubeletEvictionHardNodefsAvailable: 8% +kubeletEvictionHardNodefsInodesFree: 8% +kubeletEvictionSoftMemoryAvailable: 1280Mi +kubeletEvictionSoftNodefsAvailable: 10% +kubeletEvictionSoftNodefsInodesFree: 15% +kubeletEvictionSoftImagefsAvailable: 10% +kubeletEvictionSoftImagefsInodesFree: 10% +kubeletEvictionSoftPidAvailable: 10% +kubeletEvictionSoftGracePeriodImagefsAvailable: 10m0s +kubeletEvictionSoftGracePeriodImagefsInodesFree: 10m0s +kubeletEvictionSoftGracePeriodMemoryAvailable: 5m0s +kubeletEvictionSoftGracePeriodNodefsAvailable: 10m0s +kubeletEvictionSoftGracePeriodNodefsInodesFree: 10m0s +kubeletEvictionSoftGracePeriodPidAvailable: 2m0s +#kubeletImageGCHighThresholdPercent: 85 +#kubeletImageGCLowThresholdPercent: 80 +#kubeletImageMinimumGCAge: 2m0s +#kubeletCpuCFSQuota: true +#kubeletPodsPerCore: 5 +#kubeletMaxPods: 110 +## Create Low Priority Class For Generating Headroom +headRoom: true -kubelet: - systemReserved: - cpu: 250m - memory: 200Mi - ephemeral-storage: 2Gi - kubeReserved: - cpu: 250m - memory: 1Gi - ephemeral-storage: 4Gi - evictionHard: - memory.available: 768Mi - nodefs.available: 8% - nodefs.inodesFree: 8% - evictionSoft: - memory.available: 1280Mi - nodefs.available: 10% - nodefs.inodesFree: 15% - imagefs.available: 10% - imagefs.inodesFree: 10% - pid.available: 10% - evictionSoftGracePeriod: - imagefs.available: 10m0s - imagefs.inodesFree: 10m0s - memory.available: 5m0s - nodefs.available: 10m0s - nodefs.inodesFree: 10m0s - pid.available: 2m0s +#PlaceHolder fo NodeGroups +nodeGroups: {} diff --git a/robots.txt b/robots.txt new file mode 100644 index 0000000..1f53798 --- /dev/null +++ b/robots.txt @@ -0,0 +1,2 @@ +User-agent: * +Disallow: / From fb1e7006852dc0df07d421c88178371d509b5e30 Mon Sep 17 00:00:00 2001 From: nadavbuc Date: Mon, 8 Apr 2024 13:13:06 +0300 Subject: [PATCH 03/26] Ready for CI Phase --- .../nodegroups_example_values.yaml | 3 + .../tests/headroom_nodes_default_test.yaml | 61 ++++++++++++++++ .../tests/headroom_nodes_workers_test.yaml | 73 +++++++++++++++++++ .../tests/nodepool_nodes_default_test.yaml | 2 +- .../tests/nodepool_nodes_workers_test.yaml | 2 +- .../tests/priorityclass_test.yaml | 2 +- charts/karpenter_nodes/tests/values.yaml | 1 - 7 files changed, 140 insertions(+), 4 deletions(-) create mode 100644 charts/karpenter_nodes/tests/headroom_nodes_default_test.yaml create mode 100644 charts/karpenter_nodes/tests/headroom_nodes_workers_test.yaml diff --git a/charts/karpenter_nodes/nodegroups_example_values.yaml b/charts/karpenter_nodes/nodegroups_example_values.yaml index 8c5ae51..d60cf2a 100644 --- a/charts/karpenter_nodes/nodegroups_example_values.yaml +++ b/charts/karpenter_nodes/nodegroups_example_values.yaml @@ -29,6 +29,9 @@ nodeGroups: nodeHeadRooms: - size: small count: 2 + antiAffinitySpec: + - key: testlabel1 + operator: Exists labels: testlabel1: label1 testlabel2: label2 diff --git a/charts/karpenter_nodes/tests/headroom_nodes_default_test.yaml b/charts/karpenter_nodes/tests/headroom_nodes_default_test.yaml new file mode 100644 index 0000000..1e45176 --- /dev/null +++ b/charts/karpenter_nodes/tests/headroom_nodes_default_test.yaml @@ -0,0 +1,61 @@ +suite: test headroom nodes-default +templates: + - headroom.yaml + +values: + - values.yaml + +tests: + - it: Verify nodes-default metadata + documentIndex: 0 + asserts: + - isKind: + of: Deployment + - equal: + path: metadata.name + value: headroom-nodes-default-amd64-small + - equal: + path: metadata.labels.k8s-app + value: headroom-nodes-default-amd64-small + + - it: Verify nodes-default affinity + documentIndex: 0 + asserts: + - equal: + path: spec.template.spec.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[0].matchExpressions[0].key + value: karpenter.sh/nodepool + - equal: + path: spec.template.spec.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[0].matchExpressions[0].operator + value: In + - equal: + path: spec.template.spec.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[0].matchExpressions[0].values[0] + value: nodes-default-amd64 + - equal: + path: spec.template.spec.affinity.podAntiAffinity.requiredDuringSchedulingIgnoredDuringExecution[0].labelSelector.matchExpressions[0].key + value: k8s-app + - equal: + path: spec.template.spec.affinity.podAntiAffinity.requiredDuringSchedulingIgnoredDuringExecution[0].labelSelector.matchExpressions[0].operator + value: In + - equal: + path: spec.template.spec.affinity.podAntiAffinity.requiredDuringSchedulingIgnoredDuringExecution[0].labelSelector.matchExpressions[0].values[0] + value: headroom-nodes-default-amd64-small + - equal: + path: spec.template.spec.affinity.podAntiAffinity.requiredDuringSchedulingIgnoredDuringExecution[0].labelSelector.matchExpressions[1].key + value: testlabel1 + - equal: + path: spec.template.spec.affinity.podAntiAffinity.requiredDuringSchedulingIgnoredDuringExecution[0].labelSelector.matchExpressions[1].operator + value: Exists + - equal: + path: spec.template.spec.affinity.podAntiAffinity.requiredDuringSchedulingIgnoredDuringExecution[0].namespaces + value: {} + + - it: Verify nodes-default Requests + documentIndex: 0 + asserts: + - equal: + path: spec.template.spec.containers[0].resources.requests.cpu + value: "1" + - equal: + path: spec.template.spec.containers[0].resources.requests.memory + value: 4Gi + diff --git a/charts/karpenter_nodes/tests/headroom_nodes_workers_test.yaml b/charts/karpenter_nodes/tests/headroom_nodes_workers_test.yaml new file mode 100644 index 0000000..bea2ff0 --- /dev/null +++ b/charts/karpenter_nodes/tests/headroom_nodes_workers_test.yaml @@ -0,0 +1,73 @@ +suite: test headroom nodes-workers +templates: + - headroom.yaml + +values: + - values.yaml + +tests: + - it: Verify nodes-workers metadata + documentIndex: 1 + asserts: + - isKind: + of: Deployment + - equal: + path: metadata.name + value: headroom-nodes-workers-arm64-xlarge + - equal: + path: metadata.labels.k8s-app + value: headroom-nodes-workers-arm64-xlarge + + - it: Verify nodes-workers affinity + documentIndex: 1 + asserts: + - equal: + path: spec.template.spec.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[0].matchExpressions[0].key + value: karpenter.sh/nodepool + - equal: + path: spec.template.spec.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[0].matchExpressions[0].operator + value: In + - equal: + path: spec.template.spec.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[0].matchExpressions[0].values[0] + value: nodes-workers-arm64 + - equal: + path: spec.template.spec.affinity.podAntiAffinity.requiredDuringSchedulingIgnoredDuringExecution[0].labelSelector.matchExpressions[0].key + value: k8s-app + - equal: + path: spec.template.spec.affinity.podAntiAffinity.requiredDuringSchedulingIgnoredDuringExecution[0].labelSelector.matchExpressions[0].operator + value: In + - equal: + path: spec.template.spec.affinity.podAntiAffinity.requiredDuringSchedulingIgnoredDuringExecution[0].labelSelector.matchExpressions[0].values[0] + value: headroom-nodes-workers-arm64-xlarge + - equal: + path: spec.template.spec.affinity.podAntiAffinity.requiredDuringSchedulingIgnoredDuringExecution[0].labelSelector.matchExpressions[1].key + value: testlabel2 + - equal: + path: spec.template.spec.affinity.podAntiAffinity.requiredDuringSchedulingIgnoredDuringExecution[0].labelSelector.matchExpressions[1].operator + value: In + - equal: + path: spec.template.spec.affinity.podAntiAffinity.requiredDuringSchedulingIgnoredDuringExecution[0].labelSelector.matchExpressions[1].values[0] + value: value1 + - equal: + path: spec.template.spec.affinity.podAntiAffinity.requiredDuringSchedulingIgnoredDuringExecution[0].labelSelector.matchExpressions[1].values[1] + value: value2 + - equal: + path: spec.template.spec.affinity.podAntiAffinity.requiredDuringSchedulingIgnoredDuringExecution[0].namespaces[0] + value: karpenter + - equal: + path: spec.template.spec.affinity.podAntiAffinity.requiredDuringSchedulingIgnoredDuringExecution[0].namespaces[1] + value: default + - equal: + path: spec.template.spec.affinity.podAntiAffinity.requiredDuringSchedulingIgnoredDuringExecution[0].namespaces[2] + value: kube-system + + - it: Verify nodes-workers Requests + documentIndex: 1 + asserts: + - equal: + path: spec.template.spec.containers[0].resources.requests.cpu + value: "8" + - equal: + path: spec.template.spec.containers[0].resources.requests.memory + value: 32Gi + diff --git a/charts/karpenter_nodes/tests/nodepool_nodes_default_test.yaml b/charts/karpenter_nodes/tests/nodepool_nodes_default_test.yaml index cb17749..4284b60 100644 --- a/charts/karpenter_nodes/tests/nodepool_nodes_default_test.yaml +++ b/charts/karpenter_nodes/tests/nodepool_nodes_default_test.yaml @@ -1,4 +1,4 @@ -suite: test nodepools +suite: test nodepool - nodes-default templates: - nodepool.yaml diff --git a/charts/karpenter_nodes/tests/nodepool_nodes_workers_test.yaml b/charts/karpenter_nodes/tests/nodepool_nodes_workers_test.yaml index 26a123c..e4ffb0e 100644 --- a/charts/karpenter_nodes/tests/nodepool_nodes_workers_test.yaml +++ b/charts/karpenter_nodes/tests/nodepool_nodes_workers_test.yaml @@ -1,4 +1,4 @@ -suite: test nodepools +suite: test nodepool - nodes-workers templates: - nodepool.yaml diff --git a/charts/karpenter_nodes/tests/priorityclass_test.yaml b/charts/karpenter_nodes/tests/priorityclass_test.yaml index f78d3ff..e99f836 100644 --- a/charts/karpenter_nodes/tests/priorityclass_test.yaml +++ b/charts/karpenter_nodes/tests/priorityclass_test.yaml @@ -1,4 +1,4 @@ -suite: test nodeclasses +suite: test priorityclass templates: - priorityclass.yaml diff --git a/charts/karpenter_nodes/tests/values.yaml b/charts/karpenter_nodes/tests/values.yaml index e70506b..fc69206 100644 --- a/charts/karpenter_nodes/tests/values.yaml +++ b/charts/karpenter_nodes/tests/values.yaml @@ -11,7 +11,6 @@ nodeGroups: antiAffinitySpec: - key: testlabel1 operator: Exists - labels: testlabel1: label1 testlabel2: label2 From edb63c4e10360a1c8ad6ad415564af9be880cbfe Mon Sep 17 00:00:00 2001 From: nadavbuc Date: Mon, 8 Apr 2024 13:31:49 +0300 Subject: [PATCH 04/26] finalize tests with userdata --- charts/karpenter_nodes/templates/nodeclass.yaml | 2 +- charts/karpenter_nodes/tests/nodeclass_test.yaml | 4 ++++ charts/karpenter_nodes/tests/values.yaml | 2 +- charts/karpenter_nodes/userdata_example_values.yaml | 4 +++- 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/charts/karpenter_nodes/templates/nodeclass.yaml b/charts/karpenter_nodes/templates/nodeclass.yaml index 5af883e..dcf626a 100644 --- a/charts/karpenter_nodes/templates/nodeclass.yaml +++ b/charts/karpenter_nodes/templates/nodeclass.yaml @@ -66,7 +66,7 @@ spec: {{- if or (hasKey $v "userData") (hasKey $.Values "userData") }} userData: | {{- if hasKey $v "userData" }} - {{- tpl $.Values.userData $ | nindent 4 }} + {{- tpl $v.userData $ | nindent 4 }} {{- else if hasKey $.Values "userData" }} {{- tpl $.Values.userData $ | nindent 4 }} {{- end }} diff --git a/charts/karpenter_nodes/tests/nodeclass_test.yaml b/charts/karpenter_nodes/tests/nodeclass_test.yaml index 683fac4..742f92e 100644 --- a/charts/karpenter_nodes/tests/nodeclass_test.yaml +++ b/charts/karpenter_nodes/tests/nodeclass_test.yaml @@ -109,6 +109,10 @@ tests: - equal: path: spec.metadataOptions.httpPutResponseHopLimit value: 2 + - equal: + path: spec.userData + value: | + echo "Nodepool name is nodes-workers" diff --git a/charts/karpenter_nodes/tests/values.yaml b/charts/karpenter_nodes/tests/values.yaml index fc69206..e6515f9 100644 --- a/charts/karpenter_nodes/tests/values.yaml +++ b/charts/karpenter_nodes/tests/values.yaml @@ -86,7 +86,7 @@ nodeGroups: kubeletClusterDNS: - "1.1.1.1" - "2.2.2.2" - userdata: | + userData: | echo "Nodepool name is {{ default .key }}" capacitySpread: start: 1 diff --git a/charts/karpenter_nodes/userdata_example_values.yaml b/charts/karpenter_nodes/userdata_example_values.yaml index 6b5a06b..b41dd9a 100644 --- a/charts/karpenter_nodes/userdata_example_values.yaml +++ b/charts/karpenter_nodes/userdata_example_values.yaml @@ -4,7 +4,9 @@ registryHost: "" #"http://registry" registrySkipVerify: true disableIPv6: true - +nodeGroups: + nodes-default: + instances: {} userData: | CLUSTER_NAME={{ $.Values.clusterName }} INSTANCEGROUP={{ .value.nodeGroupLabel | default .key }} From 43bd558ab1017650f7eed71ed5a3472d07b30fff Mon Sep 17 00:00:00 2001 From: nadavbuc Date: Mon, 8 Apr 2024 13:35:55 +0300 Subject: [PATCH 05/26] test --- .github/workflows/test.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 .github/workflows/test.yml diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..1852d1e --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,11 @@ +name: CI + +on: pull_request + +jobs: + unittest: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: d3adb5/helm-unittest-action@v2 + - run: helm unittest --helm3 karpenter_nodes From b33dc1148b43c066850b17f53f29546bdceb3c7e Mon Sep 17 00:00:00 2001 From: nadavbuc Date: Mon, 8 Apr 2024 13:41:30 +0300 Subject: [PATCH 06/26] fix tests --- .github/workflows/test.yml | 1 - charts/karpenter_nodes/tests/nodepool_nodes_default_test.yaml | 4 ++-- charts/karpenter_nodes/tests/nodepool_nodes_workers_test.yaml | 4 ++-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 1852d1e..f45d23d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -8,4 +8,3 @@ jobs: steps: - uses: actions/checkout@v3 - uses: d3adb5/helm-unittest-action@v2 - - run: helm unittest --helm3 karpenter_nodes diff --git a/charts/karpenter_nodes/tests/nodepool_nodes_default_test.yaml b/charts/karpenter_nodes/tests/nodepool_nodes_default_test.yaml index 4284b60..9d9e838 100644 --- a/charts/karpenter_nodes/tests/nodepool_nodes_default_test.yaml +++ b/charts/karpenter_nodes/tests/nodepool_nodes_default_test.yaml @@ -117,8 +117,8 @@ tests: path: spec.template.spec.kubelet.systemReserved.cpu value: 250m - equal: - path: spec.template.spec.kubelet.evictionHard.memory\.available - value: 768Mi + path: spec.template.spec.kubelet.kubeReserved.ephemeral-storage + value: 4Gi - it: Verify nodes-default Options documentIndex: 0 diff --git a/charts/karpenter_nodes/tests/nodepool_nodes_workers_test.yaml b/charts/karpenter_nodes/tests/nodepool_nodes_workers_test.yaml index e4ffb0e..f0f5249 100644 --- a/charts/karpenter_nodes/tests/nodepool_nodes_workers_test.yaml +++ b/charts/karpenter_nodes/tests/nodepool_nodes_workers_test.yaml @@ -106,8 +106,8 @@ tests: path: spec.template.spec.kubelet.systemReserved.cpu value: 750m - equal: - path: spec.template.spec.kubelet.evictionHard.memory\.available - value: 768Mi + path: spec.template.spec.kubelet.kubeReserved.ephemeral-storage + value: 4Gi - equal: path: spec.template.spec.kubelet.clusterDNS[0] value: "1.1.1.1" From c6736f9dd4ccf5643873b3665db1b88a4840615d Mon Sep 17 00:00:00 2001 From: nadavbuc Date: Mon, 8 Apr 2024 13:53:29 +0300 Subject: [PATCH 07/26] yalla full workflow --- .github/workflows/test.yml | 10 --------- .github/workflows/tests.yml | 43 +++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 10 deletions(-) delete mode 100644 .github/workflows/test.yml create mode 100644 .github/workflows/tests.yml diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml deleted file mode 100644 index f45d23d..0000000 --- a/.github/workflows/test.yml +++ /dev/null @@ -1,10 +0,0 @@ -name: CI - -on: pull_request - -jobs: - unittest: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - uses: d3adb5/helm-unittest-action@v2 diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000..2caf634 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,43 @@ +name: Lint and Test + +on: pull_request + +jobs: + lint-and-version-check: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Set up Helm + uses: azure/setup-helm@v3 + with: + version: v3.12.1 + + - uses: actions/setup-python@v4 + with: + python-version: '3.10' + check-latest: true + + - name: Set up chart-testing + uses: helm/chart-testing-action@v2.6.0 + + - name: Run chart-testing (list-changed) + id: list-changed + run: | + changed=$(ct list-changed --target-branch ${{ github.event.repository.default_branch }}) + if [[ -n "$changed" ]]; then + echo "changed=true" >> "$GITHUB_OUTPUT" + fi + + - name: Run chart-testing (lint) + if: steps.list-changed.outputs.changed == 'true' + run: ct lint --target-branch ${{ github.event.repository.default_branch }} + unittest: + needs: lint-and-version-check + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: d3adb5/helm-unittest-action@v2 From e1a9825c929f7b41530ffbbfda7d0ee6facf2ce0 Mon Sep 17 00:00:00 2001 From: nadavbuc Date: Mon, 8 Apr 2024 13:54:08 +0300 Subject: [PATCH 08/26] fff --- .github/{PULL_REQUEST_TEMPLATE.md => pull_request_templatea.md} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename .github/{PULL_REQUEST_TEMPLATE.md => pull_request_templatea.md} (75%) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/pull_request_templatea.md similarity index 75% rename from .github/PULL_REQUEST_TEMPLATE.md rename to .github/pull_request_templatea.md index 90ef1ba..79492ca 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/pull_request_templatea.md @@ -4,7 +4,7 @@ _Provide a description of what has been changed_ ### Checklist -- [ ] README is updated with new configuration values *(if applicable)* [learn more](https://github.com/kedacore/charts/blob/main/CONTRIBUTING.md#documentation) +- [ ] README is updated with new configuration values *(if applicable)* - [ ] Changes were throughly tested locally - [ ] Changes are covered by Unit Tests - [ ] Version is updated in `Chart.yaml` From b949e2cc12aeffca9611f4454af8d1c7322614b9 Mon Sep 17 00:00:00 2001 From: nadavbuc Date: Mon, 8 Apr 2024 13:54:24 +0300 Subject: [PATCH 09/26] ffff --- .github/{pull_request_templatea.md => pull_request_template.md} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/{pull_request_templatea.md => pull_request_template.md} (100%) diff --git a/.github/pull_request_templatea.md b/.github/pull_request_template.md similarity index 100% rename from .github/pull_request_templatea.md rename to .github/pull_request_template.md From 40222fdb7e84ff93fdfcaba892ee7f3283238204 Mon Sep 17 00:00:00 2001 From: nadavbuc Date: Mon, 8 Apr 2024 13:55:36 +0300 Subject: [PATCH 10/26] fixes --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 2caf634..905c634 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -36,7 +36,7 @@ jobs: if: steps.list-changed.outputs.changed == 'true' run: ct lint --target-branch ${{ github.event.repository.default_branch }} unittest: - needs: lint-and-version-check + #needs: lint-and-version-check #Add after first chart release runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 From 96ec8867d7ffa05f4f1028b8fb86d61134e74690 Mon Sep 17 00:00:00 2001 From: nadavbuc Date: Mon, 8 Apr 2024 14:01:25 +0300 Subject: [PATCH 11/26] fixes for linter --- .github/workflows/tests.yml | 2 +- charts/karpenter_nodes/values.yaml | 114 ++++++++++++++--------------- 2 files changed, 58 insertions(+), 58 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 905c634..9572379 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -36,7 +36,7 @@ jobs: if: steps.list-changed.outputs.changed == 'true' run: ct lint --target-branch ${{ github.event.repository.default_branch }} unittest: - #needs: lint-and-version-check #Add after first chart release + needs: lint-and-version-check #Add after first chart release runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 diff --git a/charts/karpenter_nodes/values.yaml b/charts/karpenter_nodes/values.yaml index e2703b9..e0e365d 100644 --- a/charts/karpenter_nodes/values.yaml +++ b/charts/karpenter_nodes/values.yaml @@ -1,35 +1,35 @@ -##Global Configuration + ## Global Configuration -#Karpenter API Version in CRD + #Karpenter API Version in CRD ApiVersion: v1beta1 -#Nodes Configuration + #Nodes Configuration clusterName: "eks-cluster" #My Cluster Name -amiFamily: AL2 #Bottlerocket #AL2023, Can be overridden by amiSelectorTerms +amiFamily: AL2 #Bottlerocket #AL2023, Can be overridden by amiSelectorTerms IamRole: eks_nodes_role subnetSelectorTerms: [] -# - tags: -# cluster: eks-cluster -# karpenter.sh/discovery/eks-cluster: '*' -# - id: subnet-id -## Reusing same Tag Name -# - tags: -# Name: "eks-subnet-1" -# - tags: -# Name: "eks-subnet-2" + # - tags: + # cluster: eks-cluster + # karpenter.sh/discovery/eks-cluster: '*' + # - id: subnet-id + ## Reusing same Tag Name + # - tags: + # Name: "eks-subnet-1" + # - tags: + # Name: "eks-subnet-2" securityGroupSelectorTerms: [] -# - tags: -# cluster: eks-cluster -# karpenter.sh/discovery/eks-cluster: '*' -# - name: my-security-group -# - id: sg-063d7acfb4b06c82c -## Reusing same Tag Name -# - tags: -# Name: "my-security-group-1" -# - tags: -# Name: "my-security-group-2" + # - tags: + # cluster: eks-cluster + # karpenter.sh/discovery/eks-cluster: '*' + # - name: my-security-group + # - id: sg-063d7acfb4b06c82c + ## Reusing same Tag Name + # - tags: + # Name: "my-security-group-1" + # - tags: + # Name: "my-security-group-2" nodeGroupLabelName: nodegroup @@ -39,13 +39,13 @@ nodeTags: availabilityZones: [] -#MetaData Options + #MetaData Options httpEndpoint: enabled httpProtocolIPv6: disabled httpPutResponseHopLimit: 2 httpTokens: required -#Storage + #Storage blockDeviceMappings: - deviceName: /dev/xvda ebs: @@ -56,12 +56,12 @@ blockDeviceMappings: iops: 3000 throughput: 125 -#Consolidation Options + #Consolidation Options expireAfter: "720h" consolidationPolicy: "WhenUnderutilized" consolidateAfter: "5m" -#Default Instance Sizing + #Default Instance Sizing instances: minGeneration: 5 architecture: "amd64" @@ -79,27 +79,27 @@ instances: operatingSystems: - linux -##Exclude weak types of AMD instances -#excludeFamilies: -# - c6a -# - m6a -# - r6a -# - c5a -# - m5a -# - r5a -# - c6ad -# - m6ad -# - r6ad -# - m5ad -# - r5ad -# - r5ad - -##Exclude Expensive types like Metal -#excludeInstanceSize: -# - metal - -#Kubelet Configuration paramemters -#kubeletClusterDNS: [] + ##Exclude weak types of AMD instances + #excludeFamilies: + # - c6a + # - m6a + # - r6a + # - c5a + # - m5a + # - r5a + # - c6ad + # - m6ad + # - r6ad + # - m5ad + # - r5ad + # - r5ad + + ##Exclude Expensive types like Metal + #excludeInstanceSize: + # - metal + + #Kubelet Configuration paramemters + #kubeletClusterDNS: [] kubeletSystemReservedCpu: 250m kubeletSystemReservedMemory: 200Mi kubeletSystemReservedEphemeralStorage: 2Gi @@ -121,15 +121,15 @@ kubeletEvictionSoftGracePeriodMemoryAvailable: 5m0s kubeletEvictionSoftGracePeriodNodefsAvailable: 10m0s kubeletEvictionSoftGracePeriodNodefsInodesFree: 10m0s kubeletEvictionSoftGracePeriodPidAvailable: 2m0s -#kubeletImageGCHighThresholdPercent: 85 -#kubeletImageGCLowThresholdPercent: 80 -#kubeletImageMinimumGCAge: 2m0s -#kubeletCpuCFSQuota: true -#kubeletPodsPerCore: 5 -#kubeletMaxPods: 110 - -## Create Low Priority Class For Generating Headroom + #kubeletImageGCHighThresholdPercent: 85 + #kubeletImageGCLowThresholdPercent: 80 + #kubeletImageMinimumGCAge: 2m0s + #kubeletCpuCFSQuota: true + #kubeletPodsPerCore: 5 + #kubeletMaxPods: 110 + + ## Create Low Priority Class For Generating Headroom headRoom: true -#PlaceHolder fo NodeGroups + #PlaceHolder fo NodeGroups nodeGroups: {} From 0dc22a3db82e46375cdc94b5bed2ff67ea5bc5ae Mon Sep 17 00:00:00 2001 From: nadavbuc Date: Mon, 8 Apr 2024 14:03:07 +0300 Subject: [PATCH 12/26] more linter fixes --- charts/karpenter_nodes/values.yaml | 42 +++++++++++++++--------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/charts/karpenter_nodes/values.yaml b/charts/karpenter_nodes/values.yaml index e0e365d..441de5f 100644 --- a/charts/karpenter_nodes/values.yaml +++ b/charts/karpenter_nodes/values.yaml @@ -1,11 +1,11 @@ ## Global Configuration - #Karpenter API Version in CRD + # Karpenter API Version in CRD ApiVersion: v1beta1 - #Nodes Configuration -clusterName: "eks-cluster" #My Cluster Name -amiFamily: AL2 #Bottlerocket #AL2023, Can be overridden by amiSelectorTerms + # Nodes Configuration +clusterName: "eks-cluster" # My Cluster Name +amiFamily: AL2 # Bottlerocket #AL2023, Can be overridden by amiSelectorTerms IamRole: eks_nodes_role subnetSelectorTerms: [] @@ -39,13 +39,13 @@ nodeTags: availabilityZones: [] - #MetaData Options + # MetaData Options httpEndpoint: enabled httpProtocolIPv6: disabled httpPutResponseHopLimit: 2 httpTokens: required - #Storage + # Storage blockDeviceMappings: - deviceName: /dev/xvda ebs: @@ -56,12 +56,12 @@ blockDeviceMappings: iops: 3000 throughput: 125 - #Consolidation Options + # Consolidation Options expireAfter: "720h" consolidationPolicy: "WhenUnderutilized" consolidateAfter: "5m" - #Default Instance Sizing + # Default Instance Sizing instances: minGeneration: 5 architecture: "amd64" @@ -79,8 +79,8 @@ instances: operatingSystems: - linux - ##Exclude weak types of AMD instances - #excludeFamilies: + ##E xclude weak types of AMD instances + # excludeFamilies: # - c6a # - m6a # - r6a @@ -94,12 +94,12 @@ instances: # - r5ad # - r5ad - ##Exclude Expensive types like Metal - #excludeInstanceSize: + ## Exclude Expensive types like Metal + # excludeInstanceSize: # - metal - #Kubelet Configuration paramemters - #kubeletClusterDNS: [] + # Kubelet Configuration paramemters + # kubeletClusterDNS: [] kubeletSystemReservedCpu: 250m kubeletSystemReservedMemory: 200Mi kubeletSystemReservedEphemeralStorage: 2Gi @@ -121,15 +121,15 @@ kubeletEvictionSoftGracePeriodMemoryAvailable: 5m0s kubeletEvictionSoftGracePeriodNodefsAvailable: 10m0s kubeletEvictionSoftGracePeriodNodefsInodesFree: 10m0s kubeletEvictionSoftGracePeriodPidAvailable: 2m0s - #kubeletImageGCHighThresholdPercent: 85 - #kubeletImageGCLowThresholdPercent: 80 - #kubeletImageMinimumGCAge: 2m0s - #kubeletCpuCFSQuota: true - #kubeletPodsPerCore: 5 - #kubeletMaxPods: 110 + # kubeletImageGCHighThresholdPercent: 85 + # kubeletImageGCLowThresholdPercent: 80 + # kubeletImageMinimumGCAge: 2m0s + # kubeletCpuCFSQuota: true + # kubeletPodsPerCore: 5 + # kubeletMaxPods: 110 ## Create Low Priority Class For Generating Headroom headRoom: true - #PlaceHolder fo NodeGroups + # PlaceHolder fo NodeGroups nodeGroups: {} From b40e1ad2f28b3ef6c7c9c3297b352f3a2b727625 Mon Sep 17 00:00:00 2001 From: nadavbuc Date: Mon, 8 Apr 2024 14:04:12 +0300 Subject: [PATCH 13/26] hopefully last linter fixes --- charts/karpenter_nodes/values.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/charts/karpenter_nodes/values.yaml b/charts/karpenter_nodes/values.yaml index 441de5f..28c910f 100644 --- a/charts/karpenter_nodes/values.yaml +++ b/charts/karpenter_nodes/values.yaml @@ -4,8 +4,8 @@ ApiVersion: v1beta1 # Nodes Configuration -clusterName: "eks-cluster" # My Cluster Name -amiFamily: AL2 # Bottlerocket #AL2023, Can be overridden by amiSelectorTerms +clusterName: "eks-cluster" # My Cluster Name +amiFamily: AL2 # Bottlerocket #AL2023, Can be overridden by amiSelectorTerms IamRole: eks_nodes_role subnetSelectorTerms: [] @@ -79,7 +79,7 @@ instances: operatingSystems: - linux - ##E xclude weak types of AMD instances + ## Exclude weak types of AMD instances # excludeFamilies: # - c6a # - m6a From 32f15bef64e6f447ff0a63b5d3a8c3ea5d685ece Mon Sep 17 00:00:00 2001 From: nadavbuc Date: Mon, 8 Apr 2024 14:05:42 +0300 Subject: [PATCH 14/26] fix for lint --- charts/karpenter_nodes/Chart.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/charts/karpenter_nodes/Chart.yaml b/charts/karpenter_nodes/Chart.yaml index 4fce05e..3def39d 100644 --- a/charts/karpenter_nodes/Chart.yaml +++ b/charts/karpenter_nodes/Chart.yaml @@ -2,5 +2,6 @@ apiVersion: v2 name: karpenter_nodes description: A Helm chart for generating NodeClasses and NodePools for Karpenter type: application - +maintainers: + -name: Fiverr DevOps Team version: 0.0.1 From 9e86b99250fee3e7ac632ba3e3ebee2c17652707 Mon Sep 17 00:00:00 2001 From: nadavbuc Date: Mon, 8 Apr 2024 14:05:52 +0300 Subject: [PATCH 15/26] fix --- charts/karpenter_nodes/Chart.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/karpenter_nodes/Chart.yaml b/charts/karpenter_nodes/Chart.yaml index 3def39d..1a74250 100644 --- a/charts/karpenter_nodes/Chart.yaml +++ b/charts/karpenter_nodes/Chart.yaml @@ -3,5 +3,5 @@ name: karpenter_nodes description: A Helm chart for generating NodeClasses and NodePools for Karpenter type: application maintainers: - -name: Fiverr DevOps Team + - name: Fiverr DevOps Team version: 0.0.1 From 4ef4a0e06c2b6e083a6db66db51df25c25950e94 Mon Sep 17 00:00:00 2001 From: nadavbuc Date: Mon, 8 Apr 2024 15:00:30 +0300 Subject: [PATCH 16/26] fix validations --- charts/karpenter_nodes/Chart.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/charts/karpenter_nodes/Chart.yaml b/charts/karpenter_nodes/Chart.yaml index 1a74250..147fcb6 100644 --- a/charts/karpenter_nodes/Chart.yaml +++ b/charts/karpenter_nodes/Chart.yaml @@ -1,7 +1,6 @@ apiVersion: v2 name: karpenter_nodes +version: 0.0.1 description: A Helm chart for generating NodeClasses and NodePools for Karpenter -type: application maintainers: - - name: Fiverr DevOps Team -version: 0.0.1 + - name: nadavbuc From 0fb99efc28501342d574737cd1d397e28f98fd93 Mon Sep 17 00:00:00 2001 From: nadavbuc Date: Mon, 8 Apr 2024 16:00:57 +0300 Subject: [PATCH 17/26] CheckPoint Readmes and small fixes --- README.md | 26 ++++- charts/karpenter_nodes/README.md | 97 +++++++++++++++++++ .../karpenter_nodes/templates/nodeclass.yaml | 8 +- .../karpenter_nodes/templates/nodepool.yaml | 2 +- charts/karpenter_nodes/values.yaml | 10 +- 5 files changed, 132 insertions(+), 11 deletions(-) create mode 100644 charts/karpenter_nodes/README.md diff --git a/README.md b/README.md index edd6cb8..211a11e 100644 --- a/README.md +++ b/README.md @@ -1 +1,25 @@ -# public_charts \ No newline at end of file +# Fiverr Community Helm Charts + +The code is provided as-is with no warranties. + +## Usage + +[Helm](https://helm.sh) must be installed to use the charts. +Please refer to Helm's [documentation](https://helm.sh/docs/) to get started. + +Once Helm is set up properly, add the repository as follows: + +```console +helm repo add +``` + +You can then run `helm search repo ` to see the charts. + +## License + + +[Apache 2.0 License](https://github.com/fiverr/public_charts/blob/main/LICENSE). + +## Helm charts build status + +TODO diff --git a/charts/karpenter_nodes/README.md b/charts/karpenter_nodes/README.md new file mode 100644 index 0000000..df2f68f --- /dev/null +++ b/charts/karpenter_nodes/README.md @@ -0,0 +1,97 @@ +## Fiverr Public Helm Templates - Karpenter Nodes + +## Table of contents +- [Table of contents](#table-of-contents) +- [Introduction](#introduction) +- [Working with Helm](#working-with-helm) + - [Testing Your Changes](#testing-your-changes) +- [Configuration keys](#configuration-keys) + +## Introduction + +This Helm Template is designed to generate NodeClasses and NodePools using [Karpenter](https://karpenter.sh/) in addition to optional HeadRoom. + +The template follows a naming convention which is comprised of the `nodegroup` name and its architecture (amd64, arm64 or multiarch). + +For example `nodes-default-amd64` + +### UserData +The `UserData` field supports templating and your own values. You can take a look at the `userdata_example_values.yaml` file for an example. + +## Working with Helm + +### Todo - add helm install command when repo is public and alive with real url + +### Testing Your Changes +After making changes you will probably want to see the new output. Run `helm template` with the relevant example files:
+`helm template . -f values.yaml` + +### Unit Tests +Make sure you have `helm-unittest` plugin installed. [helm-unittest](https://github.com/helm-unittest/helm-unittest) +Unit tests are written in `tests` directory. To run the tests, use the following command:
+`helm unittest --helm3 karpenter_nodes -f "tests/$value/*_test.yaml"` + + +## Configuration keys +Note - Most of the values can be overridden per nodegroup + +| Key Name | Description | Type | Optional? | Optional Per NodeGroup? | +| ------------------------------ | ----------- | ---- | --------- | ----------------------- | +| `ApiVersion` | ApiVersion used in Karpenter's CRD | `String` | × | × | +| `IamRole` | The IAM Role which will be attached to the instance
via instance-profile (not required if `IamInstanceProfile` is specified) | `String` | x | ✓ | +| `IamInstanceProfile` | Existing instance profile To set on the instances
(not required if `IamRole` is specified)| `String` | x | ✓ | +| `amiFamily` | AMIFamily to use (Default to AL2) [Documentation](https://karpenter.sh/docs/concepts/nodeclasses/#specamifamily) | `String` | x | ✓ | +| `amiSelectorTerms` | AMI Selector Terms (This will override `amiFamily`) [Documentation](https://karpenter.sh/docs/concepts/nodeclasses/#specamiselectorterms) | `List(Map)` | x | ✓ | +| `subnetSelectorTerms` | Selector for Subnets | `List(Map)` [Documentation](https://karpenter.sh/docs/concepts/nodeclasses/#specsubnetselectorterms) | x | ✓ | +| `securityGroupSelectorTerms` | Selector for Security Groups | `List(Map)` [Documentation](https://karpenter.sh/docs/concepts/nodeclasses/#specsecuritygroupselectorterms) | x | ✓ | +| `nodeGroupLabelName` | The Name of the label for each nodegroup (default is `nodegroup`) | `String` | x | ✓ | +| `nodeTags` | Tags to add to the instances ``: `` | `Map` | ✓ | ✓ | +| `additionalNodeTags` | Additional Tags to add to the instances ``: `` | `Map` | ✓ | ✓ | +| `nodegroups.{}` | each will be used to setup a provisioner and template based on the nodegrup name key | `List[Maps]` | x | ✓ | +| `blockDeviceMappings` | Block Device Mappings [Documentation](https://karpenter.sh/docs/concepts/nodeclasses/#specblockdevicemappings) | `List(Map)` | x | ✓ | +| `instanceStorePolicy` | Instance Store Policy [Documentation](https://karpenter.sh/docs/concepts/nodeclasses/#specinstancestorepolicy) | `String` | ✓ | ✓ | +| `metaDataHttpEndpoint` | Metadata HTTP Endpoint [Documentation](https://karpenter.sh/docs/concepts/nodeclasses/#specmetadataoptions) | `String` | x | ✓ | +| `metaDataHttpProtocolIPv6` | Metadata HTTP Protocol IPv6 [Documentation](https://karpenter.sh/docs/concepts/nodeclasses/#specmetadataoptions) | `String` | x | ✓ | +| `metaDataHttpPutResponseHopLimit` | Metadata HTTP Put Response Hop Limit [Documentation](https://karpenter.sh/docs/concepts/nodeclasses/#specmetadataoptions) | `String` | x | ✓ | +| `metaDataHttpTokens` | Metadata HTTP Tokens [Documentation](https://karpenter.sh/docs/concepts/nodeclasses/#specmetadataoptions) | `String` | x | ✓ | +| `userData` | User Data (supports templating and your own values) | `MultilineString` | ✓ | ✓ | +| `instances` | Instance provisioners configurations for node types, families and sizing - extended below | `Map` | x | ✓ | +| `instances.minGeneration` | The minimum instance generation to use (for example 4 = c4,c5,c6 etc) | `Integer` | x | ✓ | +| `instances.architecture` | `amd64`, `arm64` or `multiarch` for nodegroups which can have combined architectures | `String` | x | ✓ | +| `instances.categories` | Allowed instance categories (c, m, r) | `List` | x | ✓ | +| `instances.cores` | Allowed instance with X cores (4, 8) | `List` | x | ✓ | +| `instances.capacityType` | `spot`, `on-demand` (can use both on single provisioner) | `List` | x | ✓ | +| `nodegroups.{}.labels` | Labels to add to nodes ``: `` | `Map` | ✓ | ✓ | +| `nodegroups.{}.annotations` | Annotations to add to nodes ``: `` | `Map` | ✓ | ✓ | +| `nodegroups.{}.nodeClassRef` | If you wish to use your own nodeClass, specify it [Documentation](https://karpenter.sh/docs/concepts/nodeclasses/) | `Map` | ✓ | ✓ | +| `nodegroups.{}.taints` | Taints to add to nodes `- `: ``: `` | `List(Map)` | ✓ | ✓ | +| `nodegroups.{}.startupTaints` | startupTaints to add to nodes `- `: ``: `` | `List(Map)` | ✓ | ✓ | +| `nodegroups.{}.instances.*` | Explicitly specify instances override | `Map` | ✓ | ✓ | +| `nodegroups.{}.limits` | Specify Limits [Documentation](https://karpenter.sh/docs/concepts/nodepools/#speclimits) | `Map` | ✓ | ✓ | +| `nodegroups.{}.nodeGroupLabel` | Override default generated nodegroup label | `String` | ✓ | ✓ | +| `nodegroups.{}.capacitySpread` | Set range of capacity spread keys (`integers`), set int for `start` and `end` | `Map` | ✓ | ✓ | +| `nodegroups.{}.*` | Over-write all above which supports it | `Map` | ✓ | ✓ | +| `nodegroups.{}.weight` | Specify NodeGroup Weight (default is `1`) | `Integer` | ✓ | ✓ | +| `nodegroups.{}.excludeFamilies`| Exclude specific instance families | `List` | ✓ | ✓ | +| `nodegroups.{}.budgets` | Specify Disruption Budgets [Documentation](https://karpenter.sh/docs/concepts/disruption/#nodes) | `List` | ✓ | ✓ | + +### Headroom Configuration +| Key Name | Description | Type | Optional? | Optional Per NodeGroup? | +| ------------------------------ | ----------- | ---- | --------- | ----------------------- | +| `nodegroups.{}.nodeHeadRooms` | Specify Amount of nodes which will have reserved headroom | `String` | ✓ | ✓ | +| `nodegroups.{}.nodeHeadRooms.size` | `small`, `medium`, `large`, `xlarge` - see below | `String` | ✓ | ✓ | +| `nodegroups.{}.nodeHeadRooms.count` | Number of headroom pods to schedule | `Integer` | ✓ | ✓ | +| `nodegroups.{}.dedicatedNodeHeadRooms` | Specify Amount of empty nodes ready as headroom | `String` | ✓ | ✓ | +| `nodegroups.{}.dedicatedNodeHeadRooms.size` | `small`, `medium`, `large`, `xlarge` - see below | `String` | ✓ | ✓ | +| `nodegroups.{}.dedicatedNodeHeadRooms.count` | Number of headroom nodes to schedule | `Integer` | ✓ | ✓ | + + + +## Headroom Sizing + +| Size | CPU | Ram | +| ----- | --- | --- | +| `small` | 1 | 4Gi | +| `medium` | 2 | 8Gi | +| `large` | 4 | 16Gi | +| `xlarge` | 8 | 32Gi | diff --git a/charts/karpenter_nodes/templates/nodeclass.yaml b/charts/karpenter_nodes/templates/nodeclass.yaml index dcf626a..ff9fd11 100644 --- a/charts/karpenter_nodes/templates/nodeclass.yaml +++ b/charts/karpenter_nodes/templates/nodeclass.yaml @@ -59,10 +59,10 @@ spec: instanceStorePolicy: {{ $.Values.instanceStorePolicy }} {{- end }} metadataOptions: - httpEndpoint: {{ $v.httpEndpoint | default $.Values.httpEndpoint }} - httpProtocolIPv6: {{ $v.httpProtocolIPv6 | default $.Values.httpProtocolIPv6 }} - httpPutResponseHopLimit: {{ $v.httpPutResponseHopLimit | default $.Values.httpPutResponseHopLimit }} - httpTokens: {{ $v.httpTokens | default $.Values.httpTokens }} + httpEndpoint: {{ $v.metaDataHttpEndpoint | default $.Values.metaDataHttpEndpoint }} + httpProtocolIPv6: {{ $v.metaDataHttpProtocolIPv6 | default $.Values.metaDataHttpProtocolIPv6 }} + httpPutResponseHopLimit: {{ $v.metaDataHttpPutResponseHopLimit | default $.Values.metaDataHttpPutResponseHopLimit }} + httpTokens: {{ $v.metaDataHttpTokens | default $.Values.metaDataHttpTokens }} {{- if or (hasKey $v "userData") (hasKey $.Values "userData") }} userData: | {{- if hasKey $v "userData" }} diff --git a/charts/karpenter_nodes/templates/nodepool.yaml b/charts/karpenter_nodes/templates/nodepool.yaml index 476dbf8..23ad2a4 100644 --- a/charts/karpenter_nodes/templates/nodepool.yaml +++ b/charts/karpenter_nodes/templates/nodepool.yaml @@ -217,5 +217,5 @@ spec: {{ $limitName }}: {{ $limitValue }} {{- end }} {{- end }} - weight: {{ $v.weight | default 1 }} + weight: {{ $v.weight | default $.Values.weight }} {{- end }} diff --git a/charts/karpenter_nodes/values.yaml b/charts/karpenter_nodes/values.yaml index 28c910f..8933ba0 100644 --- a/charts/karpenter_nodes/values.yaml +++ b/charts/karpenter_nodes/values.yaml @@ -40,10 +40,10 @@ nodeTags: availabilityZones: [] # MetaData Options -httpEndpoint: enabled -httpProtocolIPv6: disabled -httpPutResponseHopLimit: 2 -httpTokens: required +metaDataHttpEndpoint: enabled +metaDataHttpProtocolIPv6: disabled +metaDataHttpPutResponseHopLimit: 2 +metaDataHttpTokens: required # Storage blockDeviceMappings: @@ -127,7 +127,7 @@ kubeletEvictionSoftGracePeriodPidAvailable: 2m0s # kubeletCpuCFSQuota: true # kubeletPodsPerCore: 5 # kubeletMaxPods: 110 - +weight: 1 ## Create Low Priority Class For Generating Headroom headRoom: true From 1c2f62b2f7fc80aa401957cda8ccb4a6beea917f Mon Sep 17 00:00:00 2001 From: nadavbuc Date: Mon, 8 Apr 2024 16:41:38 +0300 Subject: [PATCH 18/26] Finalize Documentation --- README.md | 2 +- charts/karpenter_nodes/README.md | 41 +++++++++++++++++++------------- 2 files changed, 25 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 211a11e..91b8e81 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Fiverr Community Helm Charts +# [WIP] Fiverr Community Helm Charts [WIP] The code is provided as-is with no warranties. diff --git a/charts/karpenter_nodes/README.md b/charts/karpenter_nodes/README.md index df2f68f..0e7d849 100644 --- a/charts/karpenter_nodes/README.md +++ b/charts/karpenter_nodes/README.md @@ -33,7 +33,8 @@ Unit tests are written in `tests` directory. To run the tests, use the following ## Configuration keys -Note - Most of the values can be overridden per nodegroup +Note - Most of the values can be overridden per nodegroup (If not specified, it will use the default (Global) values) + | Key Name | Description | Type | Optional? | Optional Per NodeGroup? | | ------------------------------ | ----------- | ---- | --------- | ----------------------- | @@ -55,37 +56,43 @@ Note - Most of the values can be overridden per nodegroup | `metaDataHttpPutResponseHopLimit` | Metadata HTTP Put Response Hop Limit [Documentation](https://karpenter.sh/docs/concepts/nodeclasses/#specmetadataoptions) | `String` | x | ✓ | | `metaDataHttpTokens` | Metadata HTTP Tokens [Documentation](https://karpenter.sh/docs/concepts/nodeclasses/#specmetadataoptions) | `String` | x | ✓ | | `userData` | User Data (supports templating and your own values) | `MultilineString` | ✓ | ✓ | -| `instances` | Instance provisioners configurations for node types, families and sizing - extended below | `Map` | x | ✓ | +| `instances` | Instance configurations for node types, families and sizing - see below | `Map` | x | ✓ | | `instances.minGeneration` | The minimum instance generation to use (for example 4 = c4,c5,c6 etc) | `Integer` | x | ✓ | | `instances.architecture` | `amd64`, `arm64` or `multiarch` for nodegroups which can have combined architectures | `String` | x | ✓ | -| `instances.categories` | Allowed instance categories (c, m, r) | `List` | x | ✓ | -| `instances.cores` | Allowed instance with X cores (4, 8) | `List` | x | ✓ | -| `instances.capacityType` | `spot`, `on-demand` (can use both on single provisioner) | `List` | x | ✓ | +| `instances.categories` | Allowed instance categories (c, m, r) | `List(String)` | x | ✓ | +| `instances.cores` | Allowed cores per instance (`"4"`, `"8"`) | `List(String(int))` | x | ✓ | +| `instances.capacityType` | `spot`, `on-demand` (can use both on single provisioner) | `List(String)` | x | ✓ | +| `instances.operatingSystems` | Allowed operating systems (`"linux"`, `"windows"`) | `List(String)` | x | ✓ | +| `availabilityZones` | Availability Zones to use | `List(String)` | x | ✓ | +| `expireAfter` | Specify how long node should be up before refreshing it [Documentation](https://karpenter.sh/docs/concepts/disruption/#automated-methods) | `String` | x | ✓ | +| `weight` | Specify NodeGroup Weight (default is `1`) | `Integer` | x | ✓ | +| `excludeFamilies` | Exclude specific instance families | `List` | x | ✓ | +| `consolidationPolicy` | Specify how to consolidate nodes [Documentation](https://karpenter.sh/docs/concepts/nodepools/) | `String` | x | ✓ | +| `consolidateAfter` | Specify how long to wait before consolidating nodes [Documentation](https://karpenter.sh/docs/concepts/nodepools/) | `String` | ✓ | ✓ | +| `excludeInstanceSize` | Exclude specific instance sizes | `List` | ✓ | ✓ | +| `headRoom` | Generate Ultra Low Priority Class for Headroom (see below) | `String` | ✓ | x | | `nodegroups.{}.labels` | Labels to add to nodes ``: `` | `Map` | ✓ | ✓ | | `nodegroups.{}.annotations` | Annotations to add to nodes ``: `` | `Map` | ✓ | ✓ | | `nodegroups.{}.nodeClassRef` | If you wish to use your own nodeClass, specify it [Documentation](https://karpenter.sh/docs/concepts/nodeclasses/) | `Map` | ✓ | ✓ | | `nodegroups.{}.taints` | Taints to add to nodes `- `: ``: `` | `List(Map)` | ✓ | ✓ | | `nodegroups.{}.startupTaints` | startupTaints to add to nodes `- `: ``: `` | `List(Map)` | ✓ | ✓ | -| `nodegroups.{}.instances.*` | Explicitly specify instances override | `Map` | ✓ | ✓ | | `nodegroups.{}.limits` | Specify Limits [Documentation](https://karpenter.sh/docs/concepts/nodepools/#speclimits) | `Map` | ✓ | ✓ | -| `nodegroups.{}.nodeGroupLabel` | Override default generated nodegroup label | `String` | ✓ | ✓ | | `nodegroups.{}.capacitySpread` | Set range of capacity spread keys (`integers`), set int for `start` and `end` | `Map` | ✓ | ✓ | -| `nodegroups.{}.*` | Over-write all above which supports it | `Map` | ✓ | ✓ | -| `nodegroups.{}.weight` | Specify NodeGroup Weight (default is `1`) | `Integer` | ✓ | ✓ | | `nodegroups.{}.excludeFamilies`| Exclude specific instance families | `List` | ✓ | ✓ | | `nodegroups.{}.budgets` | Specify Disruption Budgets [Documentation](https://karpenter.sh/docs/concepts/disruption/#nodes) | `List` | ✓ | ✓ | +| `nodegroups.{}.*` | Over-write all above which supports it | `Map` | ✓ | ✓ | +| `nodegroups.{}.instances.*` | Explicitly specify instances override, if using default specify `instances: {}` | `Map` | ✓ | ✓ | ### Headroom Configuration +Headroom will create `pause` pods with requetss to just keep empty nodes up and ready for scheduling. This is useful for scaling up quickly when needed. +The pods will be configured with ultra-low priority, and will be terminated and recreated on new nodes to free them up for usage if needed. | Key Name | Description | Type | Optional? | Optional Per NodeGroup? | | ------------------------------ | ----------- | ---- | --------- | ----------------------- | -| `nodegroups.{}.nodeHeadRooms` | Specify Amount of nodes which will have reserved headroom | `String` | ✓ | ✓ | -| `nodegroups.{}.nodeHeadRooms.size` | `small`, `medium`, `large`, `xlarge` - see below | `String` | ✓ | ✓ | -| `nodegroups.{}.nodeHeadRooms.count` | Number of headroom pods to schedule | `Integer` | ✓ | ✓ | -| `nodegroups.{}.dedicatedNodeHeadRooms` | Specify Amount of empty nodes ready as headroom | `String` | ✓ | ✓ | -| `nodegroups.{}.dedicatedNodeHeadRooms.size` | `small`, `medium`, `large`, `xlarge` - see below | `String` | ✓ | ✓ | -| `nodegroups.{}.dedicatedNodeHeadRooms.count` | Number of headroom nodes to schedule | `Integer` | ✓ | ✓ | - - +| `nodegroups.{}.headRoom` | List of headroom configurations for the nodePool | `List(Map)` | ✓ | ✓ | +| `nodegroups.{}.headRoom.size` | `small`, `medium`, `large`, `xlarge` - see below | `String` | ✓ | ✓ | +| `nodegroups.{}.headRoom.count` | Number of headroom pod replicas to schedule | `Integer` | ✓ | ✓ | +| `nodegroups.{}.headRoom.antiAffinitySpec` | Required - set antiaffinity to match against all running workloads | `LabelSelectorSpec` | ✓ | ✓ | +| `nodegroups.{}.headRoom.nameSpaces` | Specify list of namespaces to match again (default `all`) | `List(String)` | ✓ | ✓ | ## Headroom Sizing From 2d83d41a296bf5e05ddc3d1f4cbc8879b338853d Mon Sep 17 00:00:00 2001 From: nadavbuc Date: Tue, 9 Apr 2024 08:16:11 +0300 Subject: [PATCH 19/26] Readme Fixes --- charts/karpenter_nodes/README.md | 55 ++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 14 deletions(-) diff --git a/charts/karpenter_nodes/README.md b/charts/karpenter_nodes/README.md index 0e7d849..185a3f7 100644 --- a/charts/karpenter_nodes/README.md +++ b/charts/karpenter_nodes/README.md @@ -1,19 +1,10 @@ ## Fiverr Public Helm Templates - Karpenter Nodes -## Table of contents -- [Table of contents](#table-of-contents) -- [Introduction](#introduction) -- [Working with Helm](#working-with-helm) - - [Testing Your Changes](#testing-your-changes) -- [Configuration keys](#configuration-keys) - -## Introduction - +### Introduction This Helm Template is designed to generate NodeClasses and NodePools using [Karpenter](https://karpenter.sh/) in addition to optional HeadRoom. - The template follows a naming convention which is comprised of the `nodegroup` name and its architecture (amd64, arm64 or multiarch). - For example `nodes-default-amd64` +The chart will loop over the `nodegroups` and generate the relevant NodeClasses and NodePools. ### UserData The `UserData` field supports templating and your own values. You can take a look at the `userdata_example_values.yaml` file for an example. @@ -35,7 +26,6 @@ Unit tests are written in `tests` directory. To run the tests, use the following ## Configuration keys Note - Most of the values can be overridden per nodegroup (If not specified, it will use the default (Global) values) - | Key Name | Description | Type | Optional? | Optional Per NodeGroup? | | ------------------------------ | ----------- | ---- | --------- | ----------------------- | | `ApiVersion` | ApiVersion used in Karpenter's CRD | `String` | × | × | @@ -71,6 +61,9 @@ Note - Most of the values can be overridden per nodegroup (If not specified, it | `consolidateAfter` | Specify how long to wait before consolidating nodes [Documentation](https://karpenter.sh/docs/concepts/nodepools/) | `String` | ✓ | ✓ | | `excludeInstanceSize` | Exclude specific instance sizes | `List` | ✓ | ✓ | | `headRoom` | Generate Ultra Low Priority Class for Headroom (see below) | `String` | ✓ | x | + +### NodeGroup Configuration +| Key Name | Description | Type | Optional? | Optional Per NodeGroup? | | `nodegroups.{}.labels` | Labels to add to nodes ``: `` | `Map` | ✓ | ✓ | | `nodegroups.{}.annotations` | Annotations to add to nodes ``: `` | `Map` | ✓ | ✓ | | `nodegroups.{}.nodeClassRef` | If you wish to use your own nodeClass, specify it [Documentation](https://karpenter.sh/docs/concepts/nodeclasses/) | `Map` | ✓ | ✓ | @@ -81,7 +74,7 @@ Note - Most of the values can be overridden per nodegroup (If not specified, it | `nodegroups.{}.excludeFamilies`| Exclude specific instance families | `List` | ✓ | ✓ | | `nodegroups.{}.budgets` | Specify Disruption Budgets [Documentation](https://karpenter.sh/docs/concepts/disruption/#nodes) | `List` | ✓ | ✓ | | `nodegroups.{}.*` | Over-write all above which supports it | `Map` | ✓ | ✓ | -| `nodegroups.{}.instances.*` | Explicitly specify instances override, if using default specify `instances: {}` | `Map` | ✓ | ✓ | +| `nodegroups.{}.instances.*` | Explicitly specify instances override, if using defaults specify `instances: {}` | `Map` | ✓ | ✓ | ### Headroom Configuration Headroom will create `pause` pods with requetss to just keep empty nodes up and ready for scheduling. This is useful for scaling up quickly when needed. @@ -94,7 +87,7 @@ The pods will be configured with ultra-low priority, and will be terminated and | `nodegroups.{}.headRoom.antiAffinitySpec` | Required - set antiaffinity to match against all running workloads | `LabelSelectorSpec` | ✓ | ✓ | | `nodegroups.{}.headRoom.nameSpaces` | Specify list of namespaces to match again (default `all`) | `List(String)` | ✓ | ✓ | -## Headroom Sizing +### Headroom Sizing | Size | CPU | Ram | | ----- | --- | --- | @@ -102,3 +95,37 @@ The pods will be configured with ultra-low priority, and will be terminated and | `medium` | 2 | 8Gi | | `large` | 4 | 16Gi | | `xlarge` | 8 | 32Gi | + +### Kubelet Configuration +[Documentation](https://karpenter.sh/docs/concepts/nodepools/#spectemplatespeckubelet) +Kubelet configuration can be set globally or per nodegroup. The following keys are supported: +| Key Name | Description | Type | Optional? | Optional Per NodeGroup? | +| ------------------------------ | ----------- | ---- | --------- | ----------------------- | +| `kubeletClusterDNS` | Cluster DNS | `List` | ✓ | ✓ | +| `kubeletSystemReservedCpu` | System Reserved CPU | `String` | x | ✓ | +| `kubeletSystemReservedMemory` | System Reserved Memory | `String` | x | ✓ | +| `kubeletSystemReservedEphemeralStorage` | System Reserved Ephemeral Storage | `String` | x | ✓ | +| `kubeletKubeReservedCpu` | Kube Reserved CPU | `String` | x | ✓ | +| `kubeletKubeReservedMemory` | Kube Reserved Memory | `String` | x | ✓ | +| `kubeletKubeReservedEphemeralStorage` | Kube Reserved Ephemeral Storage | `String` | x | ✓ | +| `kubeletEvictionHardMemoryAvailable` | Eviction Hard Memory Available | `String` | x | ✓ | +| `kubeletEvictionHardNodefsAvailable` | Eviction Hard Nodefs Available | `String` | x | ✓ | +| `kubeletEvictionHardNodefsInodesFree` | Eviction Hard Nodefs Inodes Free | `String` | x | ✓ | +| `kubeletEvictionSoftMemoryAvailable` | Eviction Soft Memory Available | `String` | x | ✓ | +| `kubeletEvictionSoftNodefsAvailable` | Eviction Soft Nodefs Available | `String` | x | ✓ | +| `kubeletEvictionSoftNodefsInodesFree` | Eviction Soft Nodefs Inodes Free | `String` | x | ✓ | +| `kubeletEvictionSoftImagefsAvailable` | Eviction Soft Imagefs Available | `String` | x | ✓ | +| `kubeletEvictionSoftImagefsInodesFree` | Eviction Soft Imagefs Inodes Free | `String` | x | ✓ | +| `kubeletEvictionSoftPidAvailable` | Eviction Soft Pid Available | `String` | x | ✓ | +| `kubeletEvictionSoftGracePeriodImagefsAvailable` | Eviction Soft Grace Period Imagefs Available | `String` | x | ✓ | +| `kubeletEvictionSoftGracePeriodImagefsInodesFree` | Eviction Soft Grace Period Imagefs Inodes Free | `String` | x | ✓ | +| `kubeletEvictionSoftGracePeriodMemoryAvailable` | Eviction Soft Grace Period Memory Available | `String` | x | ✓ | +| `kubeletEvictionSoftGracePeriodNodefsAvailable` | Eviction Soft Grace Period Nodefs Available | `String` | x | ✓ | +| `kubeletEvictionSoftGracePeriodNodefsInodesFree` | Eviction Soft Grace Period Nodefs Inodes Free | `String` | x | ✓ | +| `kubeletEvictionSoftGracePeriodPidAvailable` | Eviction Soft Grace Period Pid Available | `String` | x | ✓ | +| `kubeletImageGCHighThresholdPercent` | Image GC High Threshold Percent | `String` | ✓ | ✓ | +| `kubeletImageGCLowThresholdPercent` | Image GC Low Threshold Percent | `String` | ✓ | ✓ | +| `kubeletImageMinimumGCAge` | Image Minimum GC Age | `String` | ✓ | ✓ | +| `kubeletCpuCFSQuota` | CPU CFS Quota | `String` | ✓ | ✓ | +| `kubeletPodsPerCore` | Pods Per Core | `String` | ✓ | ✓ | +| `kubeletMaxPods` | Max Pods | `String` | ✓ | ✓ | From b20eb7cdfe896ae6cf091370f133177fd9fc10a7 Mon Sep 17 00:00:00 2001 From: nadavbuc Date: Tue, 9 Apr 2024 08:52:21 +0300 Subject: [PATCH 20/26] add grafana dashboards --- charts/karpenter_nodes/README.md | 3 + .../grafana/Karpenter-OverView.json | 1935 ++++++++++++++ .../grafana/Karpenter-Per-NodeGroup.json | 2285 +++++++++++++++++ 3 files changed, 4223 insertions(+) create mode 100644 charts/karpenter_nodes/grafana/Karpenter-OverView.json create mode 100644 charts/karpenter_nodes/grafana/Karpenter-Per-NodeGroup.json diff --git a/charts/karpenter_nodes/README.md b/charts/karpenter_nodes/README.md index 185a3f7..fadd517 100644 --- a/charts/karpenter_nodes/README.md +++ b/charts/karpenter_nodes/README.md @@ -129,3 +129,6 @@ Kubelet configuration can be set globally or per nodegroup. The following keys a | `kubeletCpuCFSQuota` | CPU CFS Quota | `String` | ✓ | ✓ | | `kubeletPodsPerCore` | Pods Per Core | `String` | ✓ | ✓ | | `kubeletMaxPods` | Max Pods | `String` | ✓ | ✓ | + +## Extras +See grafana directory for dashbaords available for you to import into your Grafana instance. diff --git a/charts/karpenter_nodes/grafana/Karpenter-OverView.json b/charts/karpenter_nodes/grafana/Karpenter-OverView.json new file mode 100644 index 0000000..0e372b4 --- /dev/null +++ b/charts/karpenter_nodes/grafana/Karpenter-OverView.json @@ -0,0 +1,1935 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__elements": [], + "__requires": [ + { + "type": "panel", + "id": "bargauge", + "name": "Bar gauge", + "version": "" + }, + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "8.4.4" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "stat", + "name": "Stat", + "version": "" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "iteration": 1712640887031, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "super-light-purple", + "mode": "fixed" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 40, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "vertical", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": { + "titleSize": 40, + "valueSize": 40 + }, + "textMode": "value_and_name" + }, + "pluginVersion": "8.4.4", + "repeat": "CLUSTER", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": false, + "expr": "sum(karpenter_nodeclaims_terminated{reason=\"interruption\",cluster=~\"$CLUSTER\"}) by (nodepool) - (sum(karpenter_nodeclaims_terminated{reason=\"interruption\",cluster=~\"$CLUSTER\"}) by (nodepool) offset $timediff)", + "instant": true, + "interval": "", + "legendFormat": "{{nodepool}}", + "refId": "A" + } + ], + "title": "$CLUSTER Total Interruptions - during $timediff", + "transparent": true, + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 6 + }, + "id": 38, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "mean" + ], + "displayMode": "table", + "placement": "right", + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum(rate(container_cpu_usage_seconds_total{pod=~\"karpenter.*\"})) by (cluster, pod)", + "interval": "", + "legendFormat": "[{{cluster}}]{{pod}}", + "refId": "A" + } + ], + "title": "CPU Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 6 + }, + "id": 39, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "mean" + ], + "displayMode": "table", + "placement": "right", + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum(container_memory_usage_bytes{pod=~\"karpenter.*\"}) by (cluster, pod)", + "interval": "", + "legendFormat": "[{{cluster}}]{{pod}}", + "refId": "A" + } + ], + "title": "Memory Usage", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 14 + }, + "id": 22, + "panels": [], + "title": "Cluster Capacity", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 15 + }, + "id": 35, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "mean" + ], + "displayMode": "table", + "placement": "right", + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "repeat": "CLUSTER", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "count(kube_node_labels{cluster=~\"$CLUSTER\",label_karpenter_sh_capacity_type!=\"\"}) by (label_karpenter_sh_nodepool, label_karpenter_sh_capacity_type)", + "interval": "", + "legendFormat": "[{{label_karpenter_sh_capacity_type}}]{{label_karpenter_sh_nodepool}} ", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "count(kube_node_labels{cluster=~\"$CLUSTER\",label_karpenter_sh_capacity_type!=\"\"}) by ( label_karpenter_sh_capacity_type)", + "hide": false, + "interval": "", + "legendFormat": "TOTAL [{{label_karpenter_sh_capacity_type}}]", + "refId": "B" + } + ], + "title": "Spot/OD by Provisioners - $CLUSTER", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 24 + }, + "id": 30, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(karpenter_nodepool_usage) by (cluster, nodepool, resource_type) / sum(karpenter_nodepool_limit) by (cluster, nodepool, resource_type)", + "interval": "", + "legendFormat": "[{{cluster}}] {{nodepool}} {{resource_type}}", + "range": true, + "refId": "A" + } + ], + "title": "NodeGroup Usage Out Of limit", + "type": "timeseries" + }, + { + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 31 + }, + "id": 13, + "title": "Provisioning", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "opm" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 32 + }, + "id": 19, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "min", + "max", + "sum" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(increase(karpenter_nodeclaims_created[1m])) by (cluster, nodepool, reason)", + "interval": "", + "legendFormat": "[A][{{cluster}} {{nodepool}}]{{reason}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "- sum(increase(karpenter_nodeclaims_terminated[1m])) by (cluster, reason, nodepool)", + "hide": false, + "interval": "", + "legendFormat": "[R][{{cluster}} {{nodepool}}]{{reason}}", + "range": true, + "refId": "B" + } + ], + "title": "Created/Removed Nodes", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "left", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 32 + }, + "id": 20, + "maxDataPoints": 9999999999, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "min", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Total", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(idelta(karpenter_nodeclaims_terminated{reason=\"interruption\"}[1m])) by (nodepool, cluster)", + "hide": false, + "interval": "", + "legendFormat": "[{{cluster}}] {{nodepool}}", + "range": true, + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "op": "gt", + "value": 10, + "visible": true + } + ], + "title": "Karpenter Spot Interruptions", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "opm" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 40 + }, + "id": 11, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "mean", + "sum" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(irate(karpenter_cloudprovider_duration_seconds_count{method!=\"GetInstanceTypes\"}[1m])) by (cluster, method) * 60", + "interval": "", + "legendFormat": "{{cluster}} {{method}}", + "range": true, + "refId": "A" + } + ], + "title": "Instance Provisioning Actions", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "opm" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 40 + }, + "id": 9, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "mean", + "max", + "sum" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(increase(karpenter_interruption_actions_performed[1m])) by (action_type,cluster)", + "interval": "", + "legendFormat": "[{{cluster}}]{{action_type}}", + "range": true, + "refId": "A" + } + ], + "title": "Interruption actions", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 48 + }, + "id": 6, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(increase(karpenter_deprovisioning_actions_performed[5m])) by (action)", + "interval": "", + "legendFormat": "{{action}}", + "range": true, + "refId": "A" + } + ], + "title": "Deprovisioning", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 48 + }, + "id": 4, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "histogram_quantile($perc, sum(rate(karpenter_cloudprovider_duration_seconds_bucket{method!=\"GetInstanceTypes\"}[5m])) by (le, method))", + "interval": "", + "legendFormat": "{{method}}", + "range": true, + "refId": "A" + } + ], + "title": "AWS Requests", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "opm" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 56 + }, + "id": 8, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(irate(karpenter_interruption_received_messages[1m])) by (message_type, cluster) * 60", + "hide": false, + "interval": "", + "legendFormat": "{{cluster}}_{{message_type}}", + "range": true, + "refId": "B" + } + ], + "title": "Interruption messages", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 64 + }, + "id": 24, + "panels": [], + "title": "Workers", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 65 + }, + "id": 26, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum(controller_runtime_active_workers{service=\"karpenter\"}) by (cluster, controller) / sum(controller_runtime_max_concurrent_reconciles{service=\"karpenter\"}) by (cluster, controller)", + "legendFormat": "[{{cluster}}] {{controller}}", + "range": true, + "refId": "A" + } + ], + "title": "Workers Utilization", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 65 + }, + "id": 28, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum(rate(controller_runtime_reconcile_errors_total{service=\"karpenter\"}[5m])) by (cluster, controller)", + "legendFormat": "[{{cluster}}] {{controller}}", + "range": true, + "refId": "A" + } + ], + "title": "Reconcile errors rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 14, + "w": 12, + "x": 0, + "y": 73 + }, + "id": 50, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "mean" + ], + "displayMode": "table", + "placement": "right" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "histogram_quantile($perc, rate(controller_runtime_reconcile_time_seconds_bucket{controller=~\"$controller\",cluster=~\"$CLUSTER\"}[10m]))", + "hide": false, + "interval": "", + "legendFormat": "[$perc {{controller}}] {{cluster}}", + "range": true, + "refId": "Minimum" + } + ], + "title": "Controller Reconciliation Latency [$controller]", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 14, + "w": 12, + "x": 12, + "y": 73 + }, + "id": 54, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true + }, + "pluginVersion": "8.4.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(controller_runtime_reconcile_total{cluster=~\"$CLUSTER\",controller=~\"$controller\"}[10m])) by (controller,cluster)", + "interval": "", + "legendFormat": "[{{cluster}}]{{controller}}", + "range": true, + "refId": "A", + "target": "" + } + ], + "title": "Controller Reconciliation Rate", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 87 + }, + "id": 46, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "mean" + ], + "displayMode": "table", + "placement": "right" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "karpenter_nodes_termination_time_seconds{quantile=\"$quantile\"}", + "hide": false, + "interval": "", + "legendFormat": "[$quantile][{{cluster}}]{{nodepool}}", + "range": true, + "refId": "C" + } + ], + "title": "Node Termination Latency", + "type": "timeseries" + } + ], + "refresh": false, + "schemaVersion": 35, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": true, + "text": "0.9", + "value": "0.9" + }, + "hide": 0, + "includeAll": false, + "label": "Percentile", + "multi": false, + "name": "perc", + "options": [ + { + "selected": false, + "text": "0.5", + "value": "0.5" + }, + { + "selected": false, + "text": "0.8", + "value": "0.8" + }, + { + "selected": true, + "text": "0.9", + "value": "0.9" + }, + { + "selected": false, + "text": "1", + "value": "1" + } + ], + "query": "0.5, 0.8, 0.9,1", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" + }, + { + "current": { + "selected": false, + "text": "1d", + "value": "1d" + }, + "hide": 0, + "includeAll": false, + "multi": false, + "name": "timediff", + "options": [ + { + "selected": true, + "text": "1d", + "value": "1d" + }, + { + "selected": false, + "text": "2d", + "value": "2d" + }, + { + "selected": false, + "text": "5d", + "value": "5d" + }, + { + "selected": false, + "text": "7d", + "value": "7d" + }, + { + "selected": false, + "text": "14d", + "value": "14d" + }, + { + "selected": false, + "text": "30d", + "value": "30d" + }, + { + "selected": false, + "text": "60d", + "value": "60d" + }, + { + "selected": false, + "text": "90d", + "value": "90d" + } + ], + "query": "1d,2d,5d,7d,14d,30d,60d,90d", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" + }, + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "label_values(cluster)", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "CLUSTER", + "options": [], + "query": { + "query": "label_values(cluster)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "label_values(karpenter_nodes_termination_time_seconds,quantile)", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "quantile", + "options": [], + "query": { + "query": "label_values(karpenter_nodes_termination_time_seconds,quantile)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "label_values(controller_runtime_reconcile_time_seconds_count, controller)", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "controller", + "options": [], + "query": { + "query": "label_values(controller_runtime_reconcile_time_seconds_count, controller)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Karpenter-for-export", + "uid": "ctAxtWaIk", + "version": 12, + "weekStart": "" +} \ No newline at end of file diff --git a/charts/karpenter_nodes/grafana/Karpenter-Per-NodeGroup.json b/charts/karpenter_nodes/grafana/Karpenter-Per-NodeGroup.json new file mode 100644 index 0000000..c2ad88f --- /dev/null +++ b/charts/karpenter_nodes/grafana/Karpenter-Per-NodeGroup.json @@ -0,0 +1,2285 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__elements": [], + "__requires": [ + { + "type": "panel", + "id": "bargauge", + "name": "Bar gauge", + "version": "" + }, + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "8.4.4" + }, + { + "type": "panel", + "id": "piechart", + "name": "Pie chart", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "stat", + "name": "Stat", + "version": "" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "iteration": 1712641791200, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "super-light-purple", + "mode": "fixed" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 0, + "y": 0 + }, + "id": 32, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": { + "titleSize": 40, + "valueSize": 40 + }, + "textMode": "value_and_name" + }, + "pluginVersion": "8.4.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": false, + "expr": "sum(karpenter_nodeclaims_terminated{reason=\"interruption\",cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\"}) by (nodepool)", + "instant": true, + "interval": "", + "legendFormat": "{{nodepool}}", + "refId": "A" + } + ], + "title": "Total Interruptions", + "transparent": true, + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 4, + "y": 0 + }, + "id": 36, + "options": { + "displayLabels": [ + "percent", + "name" + ], + "legend": { + "displayMode": "hidden", + "placement": "bottom" + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.4.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": false, + "expr": "count(kube_node_labels{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\", label_karpenter_sh_capacity_type!=\"\"}) by (label_karpenter_sh_capacity_type)", + "instant": true, + "interval": "", + "legendFormat": "{{label_karpenter_sh_capacity_type}}", + "refId": "A" + } + ], + "title": "LifeCycles", + "transparent": true, + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 8, + "y": 0 + }, + "id": 40, + "options": { + "displayLabels": [ + "percent", + "name" + ], + "legend": { + "displayMode": "hidden", + "placement": "bottom" + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.4.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": false, + "expr": "count(kube_node_labels{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\", label_topology_kubernetes_io_zone!=\"\"}) by (label_topology_kubernetes_io_zone)", + "instant": true, + "interval": "", + "legendFormat": "{{label_topology_kubernetes_io_zone}}", + "refId": "A" + } + ], + "title": "AZ ", + "transparent": true, + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-BlYlRd" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 3, + "x": 12, + "y": 0 + }, + "id": 39, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "8.4.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(karpenter_nodepool_usage{cluster=\"$CLUSTER\",nodepool=~\"$NODEPOOL\",resource_type=\"memory\"} / 1024 /1024 / 1024) / \nsum(karpenter_nodepool_usage{cluster=\"$CLUSTER\",nodepool=~\"$NODEPOOL\",resource_type=\"cpu\"}) ", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "Memory / CPU Ratio", + "transparent": true, + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "semi-dark-blue", + "value": null + }, + { + "color": "red", + "value": 1536 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 3, + "x": 15, + "y": 0 + }, + "id": 38, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "8.4.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(karpenter_nodepool_usage{cluster=\"$CLUSTER\",nodepool=~\"$NODEPOOL\",resource_type=\"cpu\"})", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "Total cores $NODEGROUP", + "transparent": true, + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-BlYlRd" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1536 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 3, + "x": 18, + "y": 0 + }, + "id": 37, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^Value$/", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "8.4.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(karpenter_nodepool_usage{cluster=\"$CLUSTER\",nodepool=~\"$NODEPOOL\",resource_type=\"memory\"} /1024 /1024 /1024)", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "Total Memory (Gb)", + "transparent": true, + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 3, + "x": 21, + "y": 0 + }, + "id": 41, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "8.4.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(karpenter_nodepool_usage{cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\"}) by (resource_type, nodepool) / sum(karpenter_nodepool_limit{cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\"}) by (resource_type, nodepool)", + "instant": false, + "interval": "", + "legendFormat": "{{resource_type}} {{nodepool}}", + "range": true, + "refId": "A" + } + ], + "title": "NodeGroup Usage Out Of limit", + "transparent": true, + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 7, + "x": 0, + "y": 7 + }, + "id": 44, + "options": { + "displayLabels": [ + "percent", + "name" + ], + "legend": { + "displayMode": "table", + "placement": "right", + "values": [ + "value" + ] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.4.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": false, + "expr": "count(kube_node_labels{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\", label_karpenter_sh_capacity_type!=\"\"}) by (label_node_kubernetes_io_instance_type)", + "instant": true, + "interval": "", + "legendFormat": "{{label_node_kubernetes_io_instance_type}}", + "refId": "A" + } + ], + "title": "Instance Types", + "transparent": true, + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-YlBl" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 500 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 3, + "x": 7, + "y": 7 + }, + "id": 42, + "options": { + "displayMode": "gradient", + "orientation": "vertical", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true + }, + "pluginVersion": "8.4.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(karpenter_nodes_created{cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\"}) by (nodepool)", + "format": "time_series", + "instant": false, + "interval": "", + "legendFormat": "Created by {{provisioner}}", + "range": true, + "refId": "A" + } + ], + "title": "Total Created Nodes", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 5, + "x": 10, + "y": 7 + }, + "id": 47, + "options": { + "displayLabels": [ + "name" + ], + "legend": { + "displayMode": "hidden", + "placement": "bottom" + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.4.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(karpenter_nodeclaims_drifted{cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\"}) by (type)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}}", + "range": true, + "refId": "B" + } + ], + "title": "Drift types", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 9, + "x": 15, + "y": 7 + }, + "id": 43, + "options": { + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true + }, + "pluginVersion": "8.4.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(karpenter_nodeclaims_terminated{cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\"}) by (nodepool, reason)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Removed by {{provisioner}} {{reason}}", + "range": true, + "refId": "B" + } + ], + "title": "Total Removed Nodes", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "currencyUSD" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 15 + }, + "id": 52, + "interval": "1h", + "options": { + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true + }, + "pluginVersion": "8.4.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum_over_time(sum(count(label_replace(kube_node_labels_mixin{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\",label_karpenter_sh_capacity_type=\"spot\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.+)\")) by (instance_type) * on (instance_type) max(karpenter_cloudprovider_instance_type_price_estimate{cluster=\"$CLUSTER\",capacity_type=\"spot\"}) by (instance_type))[1d])", + "instant": false, + "interval": "", + "legendFormat": "Day", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum_over_time(sum(count(label_replace(kube_node_labels_mixin{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\",label_karpenter_sh_capacity_type=\"spot\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.+)\")) by (instance_type) * on (instance_type) max(karpenter_cloudprovider_instance_type_price_estimate{cluster=\"$CLUSTER\",capacity_type=\"spot\"}) by (instance_type))[7d])", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Week", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum_over_time(sum(count(label_replace(kube_node_labels_mixin{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\",label_karpenter_sh_capacity_type=\"spot\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.+)\")) by (instance_type) * on (instance_type) max(karpenter_cloudprovider_instance_type_price_estimate{cluster=\"$CLUSTER\",capacity_type=\"spot\"}) by (instance_type))[30d])", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Month", + "refId": "C" + } + ], + "title": "Spot Costs on $NODEGROUP", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "currencyUSD" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 15 + }, + "id": 53, + "interval": "1h", + "options": { + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true + }, + "pluginVersion": "8.4.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum_over_time(sum(count(label_replace(kube_node_labels_mixin{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\",label_karpenter_sh_capacity_type=\"on-demand\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.+)\")) by (instance_type) * on (instance_type) max(karpenter_cloudprovider_instance_type_price_estimate{cluster=\"$CLUSTER\",capacity_type=\"on-demand\"}) by (instance_type))[1d])", + "format": "time_series", + "instant": false, + "interval": "", + "legendFormat": "Day", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum_over_time(sum(count(label_replace(kube_node_labels_mixin{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\",label_karpenter_sh_capacity_type=\"on-demand\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.+)\")) by (instance_type) * on (instance_type) max(karpenter_cloudprovider_instance_type_price_estimate{cluster=\"$CLUSTER\",capacity_type=\"on-demand\"}) by (instance_type))[7d])", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Week", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum_over_time(sum(count(label_replace(kube_node_labels_mixin{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\",label_karpenter_sh_capacity_type=\"on-demand\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.+)\")) by (instance_type) * on (instance_type) max(karpenter_cloudprovider_instance_type_price_estimate{cluster=\"$CLUSTER\",capacity_type=\"on-demand\"}) by (instance_type))[30d])", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Month", + "refId": "C" + } + ], + "title": "On-Demand Costs on $NODEGROUP", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "currencyUSD" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 23 + }, + "id": 49, + "interval": "1h", + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "sum" + ], + "displayMode": "table", + "placement": "right", + "sortBy": "Total", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "count(label_replace(kube_node_labels_mixin{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\",label_karpenter_sh_capacity_type=\"spot\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.+)\")) by (instance_type) * on (instance_type) max(karpenter_cloudprovider_instance_type_price_estimate{cluster=\"$CLUSTER\",capacity_type=\"spot\"}) by (instance_type)", + "interval": "", + "legendFormat": "{{instance_type}} / Hour", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum(count(label_replace(kube_node_labels_mixin{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\",label_karpenter_sh_capacity_type=\"spot\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.+)\")) by (instance_type) * on (instance_type) max(karpenter_cloudprovider_instance_type_price_estimate{cluster=\"$CLUSTER\",capacity_type=\"spot\"}) by (instance_type))", + "hide": false, + "interval": "", + "legendFormat": "Total Hourly Price", + "refId": "B" + } + ], + "title": "Spot Hourly Pricing for $NODEGROUP", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "currencyUSD" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 23 + }, + "id": 50, + "interval": "1h", + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "sum" + ], + "displayMode": "table", + "placement": "right" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "count(label_replace(kube_node_labels_mixin{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\",label_karpenter_sh_capacity_type=\"on-demand\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.+)\")) by (instance_type) * on (instance_type) max(karpenter_cloudprovider_instance_type_price_estimate{cluster=\"$CLUSTER\",capacity_type=\"on-demand\"}) by (instance_type)", + "interval": "", + "legendFormat": "{{instance_type}} Hourly Price", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum(count(label_replace(kube_node_labels_mixin{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\",label_karpenter_sh_capacity_type=\"on-demand\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.+)\")) by (instance_type) * on (instance_type) max(karpenter_cloudprovider_instance_type_price_estimate{cluster=\"$CLUSTER\",capacity_type=\"on-demand\"}) by (instance_type))", + "hide": false, + "interval": "", + "legendFormat": "Total Hourly Price", + "refId": "B" + } + ], + "title": "On-Demand Hourly Pricing for $NODEGROUP", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 31 + }, + "id": 22, + "panels": [], + "title": "NodeGroup Capacity", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 32 + }, + "id": 19, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "min", + "max", + "sum" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(increase(karpenter_nodeclaims_created{cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\" }[1m])) by (reason)", + "interval": "", + "legendFormat": "[ADD] {{reason}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "- sum(increase(karpenter_nodeclaims_terminated{cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\"}[1m])) by (reason)", + "hide": false, + "interval": "", + "legendFormat": "[REM] {{reason}}", + "range": true, + "refId": "B" + } + ], + "title": "Created/Removed Nodes $NODEGROUP", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "left", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "Total Counter" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "unit", + "value": "none" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 32 + }, + "id": 20, + "maxDataPoints": 9999999999, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "min", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Total", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(increase(karpenter_nodeclaims_terminated{reason=\"interruption\", cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\"}[1m])) by (nodepool)", + "hide": false, + "interval": "", + "legendFormat": "{{provisioner}}", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum(karpenter_nodeclaims_terminated{reason=\"interruption\", cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\"})", + "hide": false, + "interval": "", + "legendFormat": "Total Counter", + "refId": "C" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "op": "gt", + "value": 10, + "visible": true + } + ], + "title": "Karpenter Spot Interruptions", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 40 + }, + "id": 55, + "maxDataPoints": 9999999999, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max" + ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum(increase(karpenter_nodeclaims_drifted{cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\"})) by (type) > 0", + "interval": "", + "legendFormat": "{{type}}", + "refId": "A" + } + ], + "title": "Drift Tracker", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 47 + }, + "id": 15, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(karpenter_nodepool_usage{resource_type=\"cpu\", cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\"}) by (nodepool)", + "interval": "", + "legendFormat": "{{nodepool}}", + "range": true, + "refId": "A" + } + ], + "title": "Current Provisioned Cores", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 47 + }, + "id": 16, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(karpenter_nodepool_usage{resource_type=\"memory\", cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\"}) by (nodepool)", + "interval": "", + "legendFormat": "{{cluster}} {{nodepool}}", + "range": true, + "refId": "A" + } + ], + "title": "Current Provisioned Memory", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 55 + }, + "id": 35, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "mean" + ], + "displayMode": "table", + "placement": "right", + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "count(kube_node_labels{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\", label_karpenter_sh_capacity_type!=\"\"}) by (label_karpenter_sh_capacity_type, label_karpenter_sh_nodepool)", + "interval": "", + "legendFormat": "[{{label_karpenter_sh_capacity_type}}]{{label_karpenter_sh_nodepool }}", + "refId": "A" + } + ], + "title": "LifeCycle", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 55 + }, + "id": 30, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "mean" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(karpenter_nodepool_usage{cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\"}) by (resource_type) / sum(karpenter_nodepool_limit{cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\"}) by (resource_type)", + "interval": "", + "legendFormat": "{{resource_type}}", + "range": true, + "refId": "A" + } + ], + "title": "NodeGroup Usage Out Of limit", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 63 + }, + "id": 45, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "mean" + ], + "displayMode": "table", + "placement": "right", + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "count(kube_node_labels{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\", label_karpenter_sh_capacity_type!=\"\"}) by (label_node_kubernetes_io_instance_type)", + "interval": "", + "legendFormat": "{{label_node_kubernetes_io_instance_type}}", + "refId": "A" + } + ], + "title": "Instance Types", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 63 + }, + "id": 46, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "mean" + ], + "displayMode": "table", + "placement": "right", + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "count(kube_node_labels{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\", label_karpenter_sh_capacity_type!=\"\"}) by (label_node_kubernetes_io_instance_type,label_karpenter_sh_capacity_type, label_topology_kubernetes_io_zone)", + "interval": "", + "legendFormat": "[{{label_topology_kubernetes_io_zone}}][{{label_karpenter_sh_capacity_type}}]{{label_node_kubernetes_io_instance_type}}", + "refId": "A" + } + ], + "title": "Instance Types, lifecycle and zone", + "type": "timeseries" + } + ], + "refresh": false, + "schemaVersion": 35, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "label_values(kube_node_labels,cluster)", + "hide": 0, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "CLUSTER", + "options": [], + "query": { + "query": "label_values(kube_node_labels,cluster)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "label_values(kube_node_labels{cluster=\"$CLUSTER\"},label_nodegroup)", + "hide": 0, + "includeAll": false, + "label": "Node Group", + "multi": false, + "name": "NODEGROUP", + "options": [], + "query": { + "query": "label_values(kube_node_labels{cluster=\"$CLUSTER\"},label_nodegroup)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "label_values(kube_node_labels{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\"},label_karpenter_sh_nodepool)", + "hide": 0, + "includeAll": true, + "label": "NodePool", + "multi": true, + "name": "NODEPOOL", + "options": [], + "query": { + "query": "label_values(kube_node_labels{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\"},label_karpenter_sh_nodepool)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Karpenter Per Node - Export", + "uid": "5DCs2Z-Sk", + "version": 2, + "weekStart": "" +} \ No newline at end of file From 1f0cf5725bc8b3bd0f01b40574fb3f5dd5d255a8 Mon Sep 17 00:00:00 2001 From: nadavbuc Date: Tue, 9 Apr 2024 08:55:05 +0300 Subject: [PATCH 21/26] readme fixes --- charts/karpenter_nodes/README.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/charts/karpenter_nodes/README.md b/charts/karpenter_nodes/README.md index fadd517..c32fbde 100644 --- a/charts/karpenter_nodes/README.md +++ b/charts/karpenter_nodes/README.md @@ -2,8 +2,11 @@ ### Introduction This Helm Template is designed to generate NodeClasses and NodePools using [Karpenter](https://karpenter.sh/) in addition to optional HeadRoom. + The template follows a naming convention which is comprised of the `nodegroup` name and its architecture (amd64, arm64 or multiarch). + For example `nodes-default-amd64` + The chart will loop over the `nodegroups` and generate the relevant NodeClasses and NodePools. ### UserData @@ -19,6 +22,7 @@ After making changes you will probably want to see the new output. Run `helm tem ### Unit Tests Make sure you have `helm-unittest` plugin installed. [helm-unittest](https://github.com/helm-unittest/helm-unittest) + Unit tests are written in `tests` directory. To run the tests, use the following command:
`helm unittest --helm3 karpenter_nodes -f "tests/$value/*_test.yaml"` @@ -64,6 +68,7 @@ Note - Most of the values can be overridden per nodegroup (If not specified, it ### NodeGroup Configuration | Key Name | Description | Type | Optional? | Optional Per NodeGroup? | +| ------------------------------ | ----------- | ---- | --------- | ----------------------- | | `nodegroups.{}.labels` | Labels to add to nodes ``: `` | `Map` | ✓ | ✓ | | `nodegroups.{}.annotations` | Annotations to add to nodes ``: `` | `Map` | ✓ | ✓ | | `nodegroups.{}.nodeClassRef` | If you wish to use your own nodeClass, specify it [Documentation](https://karpenter.sh/docs/concepts/nodeclasses/) | `Map` | ✓ | ✓ | @@ -77,7 +82,7 @@ Note - Most of the values can be overridden per nodegroup (If not specified, it | `nodegroups.{}.instances.*` | Explicitly specify instances override, if using defaults specify `instances: {}` | `Map` | ✓ | ✓ | ### Headroom Configuration -Headroom will create `pause` pods with requetss to just keep empty nodes up and ready for scheduling. This is useful for scaling up quickly when needed. +Headroom will create `pause` pods with requetss to just keep empty nodes up and ready for scheduling. This is useful for scaling up quickly when needed.
The pods will be configured with ultra-low priority, and will be terminated and recreated on new nodes to free them up for usage if needed. | Key Name | Description | Type | Optional? | Optional Per NodeGroup? | | ------------------------------ | ----------- | ---- | --------- | ----------------------- | From ad6ccbd83b4da4c2a38ec297db6040ef9b5fb787 Mon Sep 17 00:00:00 2001 From: nadavbuc Date: Tue, 9 Apr 2024 08:56:23 +0300 Subject: [PATCH 22/26] f --- charts/karpenter_nodes/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/karpenter_nodes/README.md b/charts/karpenter_nodes/README.md index c32fbde..d00a07b 100644 --- a/charts/karpenter_nodes/README.md +++ b/charts/karpenter_nodes/README.md @@ -82,7 +82,7 @@ Note - Most of the values can be overridden per nodegroup (If not specified, it | `nodegroups.{}.instances.*` | Explicitly specify instances override, if using defaults specify `instances: {}` | `Map` | ✓ | ✓ | ### Headroom Configuration -Headroom will create `pause` pods with requetss to just keep empty nodes up and ready for scheduling. This is useful for scaling up quickly when needed.
+Headroom will create `pause` pods with requetss to just keep empty nodes up and ready for scheduling.
This is useful for scaling up quickly when needed.
The pods will be configured with ultra-low priority, and will be terminated and recreated on new nodes to free them up for usage if needed. | Key Name | Description | Type | Optional? | Optional Per NodeGroup? | | ------------------------------ | ----------- | ---- | --------- | ----------------------- | From 0f23c40d4d20cedaa542f50ec716d54879edb846 Mon Sep 17 00:00:00 2001 From: nadavbuc Date: Tue, 9 Apr 2024 08:57:58 +0300 Subject: [PATCH 23/26] fix readme final --- charts/karpenter_nodes/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/charts/karpenter_nodes/README.md b/charts/karpenter_nodes/README.md index d00a07b..7acc607 100644 --- a/charts/karpenter_nodes/README.md +++ b/charts/karpenter_nodes/README.md @@ -37,8 +37,8 @@ Note - Most of the values can be overridden per nodegroup (If not specified, it | `IamInstanceProfile` | Existing instance profile To set on the instances
(not required if `IamRole` is specified)| `String` | x | ✓ | | `amiFamily` | AMIFamily to use (Default to AL2) [Documentation](https://karpenter.sh/docs/concepts/nodeclasses/#specamifamily) | `String` | x | ✓ | | `amiSelectorTerms` | AMI Selector Terms (This will override `amiFamily`) [Documentation](https://karpenter.sh/docs/concepts/nodeclasses/#specamiselectorterms) | `List(Map)` | x | ✓ | -| `subnetSelectorTerms` | Selector for Subnets | `List(Map)` [Documentation](https://karpenter.sh/docs/concepts/nodeclasses/#specsubnetselectorterms) | x | ✓ | -| `securityGroupSelectorTerms` | Selector for Security Groups | `List(Map)` [Documentation](https://karpenter.sh/docs/concepts/nodeclasses/#specsecuritygroupselectorterms) | x | ✓ | +| `subnetSelectorTerms` | Selector for Subnets [Documentation](https://karpenter.sh/docs/concepts/nodeclasses/#specsubnetselectorterms) | `List(Map)` | x | ✓ | +| `securityGroupSelectorTerms` | Selector for Security Groups [Documentation](https://karpenter.sh/docs/concepts/nodeclasses/#specsecuritygroupselectorterms) | `List(Map)` | x | ✓ | | `nodeGroupLabelName` | The Name of the label for each nodegroup (default is `nodegroup`) | `String` | x | ✓ | | `nodeTags` | Tags to add to the instances ``: `` | `Map` | ✓ | ✓ | | `additionalNodeTags` | Additional Tags to add to the instances ``: `` | `Map` | ✓ | ✓ | From e71b6e7261d96298d07c2716acace17a9a8452e9 Mon Sep 17 00:00:00 2001 From: nadavbuc Date: Tue, 9 Apr 2024 09:46:35 +0300 Subject: [PATCH 24/26] support new karpenter features --- charts/karpenter_nodes/README.md | 2 ++ charts/karpenter_nodes/templates/nodeclass.yaml | 2 ++ charts/karpenter_nodes/tests/nodeclass_test.yaml | 14 +++++++++++++- charts/karpenter_nodes/tests/values.yaml | 2 ++ charts/karpenter_nodes/values.yaml | 2 ++ 5 files changed, 21 insertions(+), 1 deletion(-) diff --git a/charts/karpenter_nodes/README.md b/charts/karpenter_nodes/README.md index 7acc607..be950f9 100644 --- a/charts/karpenter_nodes/README.md +++ b/charts/karpenter_nodes/README.md @@ -44,6 +44,8 @@ Note - Most of the values can be overridden per nodegroup (If not specified, it | `additionalNodeTags` | Additional Tags to add to the instances ``: `` | `Map` | ✓ | ✓ | | `nodegroups.{}` | each will be used to setup a provisioner and template based on the nodegrup name key | `List[Maps]` | x | ✓ | | `blockDeviceMappings` | Block Device Mappings [Documentation](https://karpenter.sh/docs/concepts/nodeclasses/#specblockdevicemappings) | `List(Map)` | x | ✓ | +| `detailedMonitoring` | Detailed Monitoring [Documentation](https://karpenter.sh/docs/concepts/nodeclasses/#specdetailedmonitoring) | `Boolean` | x | ✓ | +| `associatePublicIPAddress` | Associate Public IP Address [Documentation](https://karpenter.sh/docs/concepts/nodeclasses/#specassociatepublicipaddress) | `Boolean` | x | ✓ | | `instanceStorePolicy` | Instance Store Policy [Documentation](https://karpenter.sh/docs/concepts/nodeclasses/#specinstancestorepolicy) | `String` | ✓ | ✓ | | `metaDataHttpEndpoint` | Metadata HTTP Endpoint [Documentation](https://karpenter.sh/docs/concepts/nodeclasses/#specmetadataoptions) | `String` | x | ✓ | | `metaDataHttpProtocolIPv6` | Metadata HTTP Protocol IPv6 [Documentation](https://karpenter.sh/docs/concepts/nodeclasses/#specmetadataoptions) | `String` | x | ✓ | diff --git a/charts/karpenter_nodes/templates/nodeclass.yaml b/charts/karpenter_nodes/templates/nodeclass.yaml index ff9fd11..8d83d9b 100644 --- a/charts/karpenter_nodes/templates/nodeclass.yaml +++ b/charts/karpenter_nodes/templates/nodeclass.yaml @@ -58,6 +58,8 @@ spec: {{- else if hasKey $.Values "instanceStorePolicy" }} instanceStorePolicy: {{ $.Values.instanceStorePolicy }} {{- end }} + detailedMonitoring: {{ $v.detailedMonitoring | default $.Values.detailedMonitoring }} + associatePublicIPAddress: {{ $v.associatePublicIPAddress | default $.Values.associatePublicIPAddress }} metadataOptions: httpEndpoint: {{ $v.metaDataHttpEndpoint | default $.Values.metaDataHttpEndpoint }} httpProtocolIPv6: {{ $v.metaDataHttpProtocolIPv6 | default $.Values.metaDataHttpProtocolIPv6 }} diff --git a/charts/karpenter_nodes/tests/nodeclass_test.yaml b/charts/karpenter_nodes/tests/nodeclass_test.yaml index 742f92e..393de8c 100644 --- a/charts/karpenter_nodes/tests/nodeclass_test.yaml +++ b/charts/karpenter_nodes/tests/nodeclass_test.yaml @@ -56,6 +56,13 @@ tests: - equal: path: spec.metadataOptions.httpPutResponseHopLimit value: 2 + - equal: + path: spec.detailedMonitoring + value: false + - equal: + path: spec.associatePublicIPAddress + value: false + - it: Verify nodes-workers documentIndex: 1 @@ -113,6 +120,11 @@ tests: path: spec.userData value: | echo "Nodepool name is nodes-workers" - + - equal: + path: spec.detailedMonitoring + value: true + - equal: + path: spec.associatePublicIPAddress + value: true diff --git a/charts/karpenter_nodes/tests/values.yaml b/charts/karpenter_nodes/tests/values.yaml index e6515f9..b950124 100644 --- a/charts/karpenter_nodes/tests/values.yaml +++ b/charts/karpenter_nodes/tests/values.yaml @@ -39,6 +39,8 @@ nodeGroups: iops: 3000 throughput: 125 IamInstanceProfile: "arn:aws:blablablabla" + detailedMonitoring: true + associatePublicIPAddress: true excludeFamilies: - m6a headRoom: diff --git a/charts/karpenter_nodes/values.yaml b/charts/karpenter_nodes/values.yaml index 8933ba0..291ad44 100644 --- a/charts/karpenter_nodes/values.yaml +++ b/charts/karpenter_nodes/values.yaml @@ -56,6 +56,8 @@ blockDeviceMappings: iops: 3000 throughput: 125 +detailedMonitoring: false +associatePublicIPAddress: false # Consolidation Options expireAfter: "720h" consolidationPolicy: "WhenUnderutilized" From 3add1fd182425f104879f617c3c38509f6b5d2fc Mon Sep 17 00:00:00 2001 From: nadavbuc Date: Tue, 9 Apr 2024 10:06:31 +0300 Subject: [PATCH 25/26] fix tests --- .github/workflows/tests.yml | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 9572379..b6cdf0e 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -3,7 +3,7 @@ name: Lint and Test on: pull_request jobs: - lint-and-version-check: + lint-and-version-check-then-tests: runs-on: ubuntu-latest steps: - name: Checkout @@ -35,9 +35,7 @@ jobs: - name: Run chart-testing (lint) if: steps.list-changed.outputs.changed == 'true' run: ct lint --target-branch ${{ github.event.repository.default_branch }} - unittest: - needs: lint-and-version-check #Add after first chart release - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - uses: d3adb5/helm-unittest-action@v2 + + - name: Run chart-testing (unit) + uses: d3adb5/helm-unittest-action@v2 + if: steps.list-changed.outputs.changed == 'true' From 782f84cfb76bb587477e95cfdba12795237150a2 Mon Sep 17 00:00:00 2001 From: nadavbuc Date: Tue, 9 Apr 2024 10:08:37 +0300 Subject: [PATCH 26/26] try same helm ver --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index b6cdf0e..4975b78 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -14,7 +14,7 @@ jobs: - name: Set up Helm uses: azure/setup-helm@v3 with: - version: v3.12.1 + version: v3.13.3 - uses: actions/setup-python@v4 with: