diff --git a/cmd/fabric/main.go b/cmd/fabric/main.go index 7c68ef4d86..995c8e071e 100644 --- a/cmd/fabric/main.go +++ b/cmd/fabric/main.go @@ -31,6 +31,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/cache" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/config" + "sigs.k8s.io/controller-runtime/pkg/healthz" "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/metrics/server" @@ -142,6 +143,14 @@ func run(cmd *cobra.Command, _ []string) error { return fmt.Errorf("unable to create manager: %w", err) } + // Register the healthiness probes. + if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil { + return fmt.Errorf("unable to set up healthz probe: %w", err) + } + if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil { + return fmt.Errorf("unable to set up readyz probe: %w", err) + } + gwr, err := sourcedetector.NewGatewayReconciler( mgr.GetClient(), mgr.GetScheme(), diff --git a/cmd/gateway/geneve/main.go b/cmd/gateway/geneve/main.go index e8408b3925..736f6df1e8 100644 --- a/cmd/gateway/geneve/main.go +++ b/cmd/gateway/geneve/main.go @@ -25,6 +25,7 @@ import ( "k8s.io/klog/v2" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client/config" + "sigs.k8s.io/controller-runtime/pkg/healthz" "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/metrics/server" @@ -95,6 +96,14 @@ func run(cmd *cobra.Command, _ []string) error { return fmt.Errorf("unable to create manager: %w", err) } + // Register the healthiness probes. + if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil { + return fmt.Errorf("unable to set up healthz probe: %w", err) + } + if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil { + return fmt.Errorf("unable to set up readyz probe: %w", err) + } + inr, err := geneve.NewInternalNodeReconciler( mgr.GetClient(), mgr.GetScheme(), diff --git a/cmd/gateway/main.go b/cmd/gateway/main.go index 950f8118ac..eeb23c2bd0 100644 --- a/cmd/gateway/main.go +++ b/cmd/gateway/main.go @@ -29,6 +29,7 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/config" + "sigs.k8s.io/controller-runtime/pkg/healthz" "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/metrics/server" @@ -160,6 +161,14 @@ func run(cmd *cobra.Command, _ []string) error { return fmt.Errorf("unable to create manager: %w", err) } + // Register the healthiness probes. + if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil { + return fmt.Errorf("unable to set up healthz probe: %w", err) + } + if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil { + return fmt.Errorf("unable to set up readyz probe: %w", err) + } + if connoptions.EnableConnectionController { // Setup the connection controller. connr, err := connection.NewConnectionsReconciler( diff --git a/cmd/gateway/wireguard/main.go b/cmd/gateway/wireguard/main.go index 59606401ed..248ea40bea 100644 --- a/cmd/gateway/wireguard/main.go +++ b/cmd/gateway/wireguard/main.go @@ -29,6 +29,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/cache" "sigs.k8s.io/controller-runtime/pkg/client/config" "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/healthz" "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/metrics" "sigs.k8s.io/controller-runtime/pkg/metrics/server" @@ -107,6 +108,14 @@ func run(cmd *cobra.Command, _ []string) error { return fmt.Errorf("unable to create manager: %w", err) } + // Register the healthiness probes. + if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil { + return fmt.Errorf("unable to set up healthz probe: %w", err) + } + if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil { + return fmt.Errorf("unable to set up readyz probe: %w", err) + } + // Setup the controller. pkr, err := wireguard.NewPublicKeysReconciler( mgr.GetClient(), diff --git a/deployments/liqo/README.md b/deployments/liqo/README.md index 6673be1ab1..723f78b004 100644 --- a/deployments/liqo/README.md +++ b/deployments/liqo/README.md @@ -86,6 +86,8 @@ | networking.enabled | bool | `true` | Use the default Liqo networking module. | | networking.fabric.config.fullMasquerade | bool | `false` | Enabe/Disable the full masquerade mode for the fabric pod. It means that all traffic will be masquerade using the first external cidr IP, instead of using the pod IP. Full masquerade is useful when the cluster nodeports uses a PodCIDR IP to masqerade the incoming traffic. IMPORTANT: Please consider that enabling this feature will masquerade the source IP of traffic towards a remote cluster, making impossible for a pod that receives the traffic to know the original source IP. | | networking.fabric.config.gatewayMasqueradeBypass | bool | `false` | Enable/Disable the masquerade bypass for the gateway pods. It means that the packets from gateway pods will not be masqueraded from the host where the pod is scheduled. This is useful in scenarios where CNIs masquerade the traffic from pod to nodes. For example this is required when using the Azure CNI or Kindnet. | +| networking.fabric.config.healthProbeBindAddressPort | string | `"8081"` | Set the port where the fabric pod will expose the health probe. To disable the health probe, set the port to 0. | +| networking.fabric.config.metricsAddressPort | string | `"8082"` | Set the port where the fabric pod will expose the metrics. To disable the metrics, set the port to 0. | | networking.fabric.config.nftablesMonitor | bool | `true` | Enable/Disable the nftables monitor for the fabric pod. It means that the fabric pod will monitor the nftables rules and will restore them in case of changes. In some cases (like K3S), this monitor can cause a huge amount of CPU usage. If you are experiencing high CPU usage, you can disable this feature. | | networking.fabric.image.name | string | `"ghcr.io/liqotech/fabric"` | Image repository for the fabric pod. | | networking.fabric.image.version | string | `""` | Custom version for the fabric image. If not specified, the global tag is used. | diff --git a/deployments/liqo/templates/liqo-fabric-daemonset.yaml b/deployments/liqo/templates/liqo-fabric-daemonset.yaml index a5da07bc8d..ae47210bc3 100644 --- a/deployments/liqo/templates/liqo-fabric-daemonset.yaml +++ b/deployments/liqo/templates/liqo-fabric-daemonset.yaml @@ -45,6 +45,8 @@ spec: - --podname=$(POD_NAME) - --nodename=$(NODE_NAME) - --geneve-port={{ .Values.networking.genevePort }} + - --health-probe-bind-address=:{{ .Values.networking.fabric.config.healthProbeBindAddressPort}} + - --metrics-address=:{{ .Values.networking.fabric.config.metricsAddressPort}} {{- if not .Values.requirements.kernel.enabled }} - --disable-kernel-version-check {{- end }} @@ -79,6 +81,16 @@ spec: valueFrom: fieldRef: fieldPath: metadata.name + {{- if and .Values.networking.fabric.config.healthProbeBindAddressPort (ne .Values.networking.fabric.config.healthProbeBindAddressPort "0") }} + ports: + - name: healthz + containerPort: {{ .Values.networking.fabric.config.healthProbeBindAddressPort }} + protocol: TCP + readinessProbe: + httpGet: + path: /readyz + port: healthz + {{- end }} hostNetwork: true {{- if .Values.networking.fabric.pod.priorityClassName }} priorityClassName: {{ .Values.networking.fabric.pod.priorityClassName }} diff --git a/deployments/liqo/templates/liqo-wireguard-gateway-client-template.yaml b/deployments/liqo/templates/liqo-wireguard-gateway-client-template.yaml index acbf2a3da9..e0aabd4527 100644 --- a/deployments/liqo/templates/liqo-wireguard-gateway-client-template.yaml +++ b/deployments/liqo/templates/liqo-wireguard-gateway-client-template.yaml @@ -79,6 +79,14 @@ spec: - containerPort: 8082 name: gw-metrics {{- end }} + - containerPort: 8083 + name: healthz + # ATTENTION: uncomment the readinessProbe section if you are aware of the consequences. + # If you have more replicas of the same gateway, the passive ones will not reach the ready state. + #readinessProbe: + # httpGet: + # path: /readyz + # port: healthz env: - name: NODE_NAME valueFrom: @@ -117,6 +125,14 @@ spec: - containerPort: 8084 name: wg-metrics {{- end }} + - containerPort: 8085 + name: healthz + # ATTENTION: uncomment the readinessProbe section if you are aware of the consequences. + # If you have more replicas of the same gateway, the passive ones will not reach the ready state. + #readinessProbe: + # httpGet: + # path: /readyz + # port: healthz securityContext: capabilities: add: @@ -155,6 +171,14 @@ spec: - containerPort: 8086 name: gv-metrics {{- end }} + - containerPort: 8087 + name: healthz + # ATTENTION: uncomment the readinessProbe section if you are aware of the consequences. + # If you have more replicas of the same gateway, the passive ones will not reach the ready state. + #readinessProbe: + # httpGet: + # path: /readyz + # port: healthz env: - name: NODE_NAME valueFrom: diff --git a/deployments/liqo/templates/liqo-wireguard-gateway-server-template-eks.yaml b/deployments/liqo/templates/liqo-wireguard-gateway-server-template-eks.yaml index a08b51654e..f2ff0c73af 100644 --- a/deployments/liqo/templates/liqo-wireguard-gateway-server-template-eks.yaml +++ b/deployments/liqo/templates/liqo-wireguard-gateway-server-template-eks.yaml @@ -106,6 +106,15 @@ spec: - containerPort: 8082 name: gw-metrics {{- end }} + ports: + - containerPort: 8083 + name: healthz + # ATTENTION: uncomment the readinessProbe section if you are aware of the consequences. + # If you have more replicas of the same gateway, the passive ones will not reach the ready state. + #readinessProbe: + # httpGet: + # path: /readyz + # port: healthz env: - name: NODE_NAME valueFrom: @@ -143,6 +152,15 @@ spec: - containerPort: 8084 name: wg-metrics {{- end }} + ports: + - containerPort: 8085 + name: healthz + # ATTENTION: uncomment the readinessProbe section if you are aware of the consequences. + # If you have more replicas of the same gateway, the passive ones will not reach the ready state. + #readinessProbe: + # httpGet: + # path: /readyz + # port: healthz securityContext: capabilities: add: @@ -172,7 +190,7 @@ spec: {{- if .Values.metrics.enabled }} - --metrics-address=:8086 {{- end }} - - --health-probe-bind-address=:8086 + - --health-probe-bind-address=:8087 volumeMounts: - name: ipc mountPath: /ipc @@ -181,6 +199,15 @@ spec: - containerPort: 8086 name: gv-metrics {{- end }} + ports: + - containerPort: 8087 + name: healthz + # ATTENTION: uncomment the readinessProbe section if you are aware of the consequences. + # If you have more replicas of the same gateway, the passive ones will not reach the ready state. + #readinessProbe: + # httpGet: + # path: /readyz + # port: healthz env: - name: NODE_NAME valueFrom: diff --git a/deployments/liqo/templates/liqo-wireguard-gateway-server-template.yaml b/deployments/liqo/templates/liqo-wireguard-gateway-server-template.yaml index d4a248548e..acdc1291c1 100644 --- a/deployments/liqo/templates/liqo-wireguard-gateway-server-template.yaml +++ b/deployments/liqo/templates/liqo-wireguard-gateway-server-template.yaml @@ -80,9 +80,9 @@ spec: {{- include "liqo.concatenateMap" $d | nindent 16 }} {{- end }} {{- if .Values.metrics.enabled }} - - --metrics-address=:8084 + - --metrics-address=:8082 {{- end }} - - --health-probe-bind-address=:8085 + - --health-probe-bind-address=:8083 - --ping-enabled=true - --ping-loss-threshold={{ .Values.networking.gatewayTemplates.ping.lossThreshold }} - --ping-interval={{ .Values.networking.gatewayTemplates.ping.interval }} @@ -96,9 +96,17 @@ spec: mountPath: /ipc {{- if .Values.metrics.enabled }} ports: - - containerPort: 8084 + - containerPort: 8082 name: gw-metrics {{- end }} + - containerPort: 8083 + name: healthz + # ATTENTION: uncomment the readinessProbe section if you are aware of the consequences. + # If you have more replicas of the same gateway, the passive ones will not reach the ready state. + #readinessProbe: + # httpGet: + # path: /readyz + # port: healthz env: - name: NODE_NAME valueFrom: @@ -127,15 +135,23 @@ spec: - --mtu={{"{{ .Spec.MTU }}"}} - --listen-port={{"{{ .Spec.Endpoint.Port }}"}} {{- if .Values.metrics.enabled }} - - --metrics-address=:8082 + - --metrics-address=:8084 {{- end }} - - --health-probe-bind-address=:8083 + - --health-probe-bind-address=:8085 - --implementation={{ .Values.networking.gatewayTemplates.wireguard.implementation }} {{- if .Values.metrics.enabled }} ports: - - containerPort: 8082 + - containerPort: 8084 name: wg-metrics {{- end }} + - containerPort: 8085 + name: healthz + # ATTENTION: uncomment the readinessProbe section if you are aware of the consequences. + # If you have more replicas of the same gateway, the passive ones will not reach the ready state. + #readinessProbe: + # httpGet: + # path: /readyz + # port: healthz securityContext: capabilities: add: @@ -174,6 +190,14 @@ spec: - containerPort: 8086 name: gv-metrics {{- end }} + - containerPort: 8087 + name: healthz + # ATTENTION: uncomment the readinessProbe section if you are aware of the consequences. + # If you have more replicas of the same gateway, the passive ones will not reach the ready state. + #readinessProbe: + # httpGet: + # path: /readyz + # port: healthz env: - name: NODE_NAME valueFrom: diff --git a/deployments/liqo/values.yaml b/deployments/liqo/values.yaml index 36da845086..cbf2b32935 100644 --- a/deployments/liqo/values.yaml +++ b/deployments/liqo/values.yaml @@ -124,6 +124,12 @@ networking: # In some cases (like K3S), this monitor can cause a huge amount of CPU usage. # If you are experiencing high CPU usage, you can disable this feature. nftablesMonitor: true + # -- Set the port where the fabric pod will expose the health probe. + # To disable the health probe, set the port to 0. + healthProbeBindAddressPort: "8081" + # -- Set the port where the fabric pod will expose the metrics. + # To disable the metrics, set the port to 0. + metricsAddressPort: "8082" authentication: # -- Enable/Disable the authentication module. diff --git a/pkg/gateway/flags.go b/pkg/gateway/flags.go index 46f670f3e2..e3ba3ae8c8 100644 --- a/pkg/gateway/flags.go +++ b/pkg/gateway/flags.go @@ -108,7 +108,7 @@ func InitFlags(flagset *pflag.FlagSet, opts *Options) { "RetryPeriod for the leader election") flagset.StringVar(&opts.MetricsAddress, FlagNameMetricsAddress.String(), "0", "Address for the metrics endpoint") - flagset.StringVar(&opts.ProbeAddr, FlagNameProbeAddr.String(), ":8081", "Address for the health probe endpoint") + flagset.StringVar(&opts.ProbeAddr, FlagNameProbeAddr.String(), "0", "Address for the health probe endpoint") flagset.BoolVar(&opts.DisableKernelVersionCheck, FlagNameDisableKernelVersionCheck.String(), false, "Disable the kernel version check") flagset.Var(&opts.MinimumKernelVersion, FlagNameMinimumKernelVersion.String(), "Minimum kernel version required by Liqo")