Skip to content

Commit

Permalink
feat: fabric and gateway health probe
Browse files Browse the repository at this point in the history
  • Loading branch information
cheina97 committed Jan 10, 2025
1 parent 6171bec commit a719216
Show file tree
Hide file tree
Showing 11 changed files with 121 additions and 8 deletions.
9 changes: 9 additions & 0 deletions cmd/fabric/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/cache"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/config"
"sigs.k8s.io/controller-runtime/pkg/healthz"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/metrics/server"

Expand Down Expand Up @@ -142,6 +143,14 @@ func run(cmd *cobra.Command, _ []string) error {
return fmt.Errorf("unable to create manager: %w", err)
}

// Register the healthiness probes.
if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
return fmt.Errorf("unable to set up healthz probe: %w", err)
}
if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {
return fmt.Errorf("unable to set up readyz probe: %w", err)
}

gwr, err := sourcedetector.NewGatewayReconciler(
mgr.GetClient(),
mgr.GetScheme(),
Expand Down
9 changes: 9 additions & 0 deletions cmd/gateway/geneve/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"k8s.io/klog/v2"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client/config"
"sigs.k8s.io/controller-runtime/pkg/healthz"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/metrics/server"

Expand Down Expand Up @@ -95,6 +96,14 @@ func run(cmd *cobra.Command, _ []string) error {
return fmt.Errorf("unable to create manager: %w", err)
}

// Register the healthiness probes.
if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
return fmt.Errorf("unable to set up healthz probe: %w", err)
}
if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {
return fmt.Errorf("unable to set up readyz probe: %w", err)
}

inr, err := geneve.NewInternalNodeReconciler(
mgr.GetClient(),
mgr.GetScheme(),
Expand Down
9 changes: 9 additions & 0 deletions cmd/gateway/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/config"
"sigs.k8s.io/controller-runtime/pkg/healthz"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/metrics/server"

Expand Down Expand Up @@ -160,6 +161,14 @@ func run(cmd *cobra.Command, _ []string) error {
return fmt.Errorf("unable to create manager: %w", err)
}

// Register the healthiness probes.
if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
return fmt.Errorf("unable to set up healthz probe: %w", err)
}
if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {
return fmt.Errorf("unable to set up readyz probe: %w", err)
}

if connoptions.EnableConnectionController {
// Setup the connection controller.
connr, err := connection.NewConnectionsReconciler(
Expand Down
9 changes: 9 additions & 0 deletions cmd/gateway/wireguard/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/cache"
"sigs.k8s.io/controller-runtime/pkg/client/config"
"sigs.k8s.io/controller-runtime/pkg/event"
"sigs.k8s.io/controller-runtime/pkg/healthz"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/metrics"
"sigs.k8s.io/controller-runtime/pkg/metrics/server"
Expand Down Expand Up @@ -107,6 +108,14 @@ func run(cmd *cobra.Command, _ []string) error {
return fmt.Errorf("unable to create manager: %w", err)
}

// Register the healthiness probes.
if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
return fmt.Errorf("unable to set up healthz probe: %w", err)
}
if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {
return fmt.Errorf("unable to set up readyz probe: %w", err)
}

// Setup the controller.
pkr, err := wireguard.NewPublicKeysReconciler(
mgr.GetClient(),
Expand Down
2 changes: 2 additions & 0 deletions deployments/liqo/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@
| networking.enabled | bool | `true` | Use the default Liqo networking module. |
| networking.fabric.config.fullMasquerade | bool | `false` | Enabe/Disable the full masquerade mode for the fabric pod. It means that all traffic will be masquerade using the first external cidr IP, instead of using the pod IP. Full masquerade is useful when the cluster nodeports uses a PodCIDR IP to masqerade the incoming traffic. IMPORTANT: Please consider that enabling this feature will masquerade the source IP of traffic towards a remote cluster, making impossible for a pod that receives the traffic to know the original source IP. |
| networking.fabric.config.gatewayMasqueradeBypass | bool | `false` | Enable/Disable the masquerade bypass for the gateway pods. It means that the packets from gateway pods will not be masqueraded from the host where the pod is scheduled. This is useful in scenarios where CNIs masquerade the traffic from pod to nodes. For example this is required when using the Azure CNI or Kindnet. |
| networking.fabric.config.healthProbeBindAddressPort | string | `"8081"` | Set the port where the fabric pod will expose the health probe. To disable the health probe, set the port to 0. |
| networking.fabric.config.metricsAddressPort | string | `"8082"` | Set the port where the fabric pod will expose the metrics. To disable the metrics, set the port to 0. |
| networking.fabric.config.nftablesMonitor | bool | `true` | Enable/Disable the nftables monitor for the fabric pod. It means that the fabric pod will monitor the nftables rules and will restore them in case of changes. In some cases (like K3S), this monitor can cause a huge amount of CPU usage. If you are experiencing high CPU usage, you can disable this feature. |
| networking.fabric.image.name | string | `"ghcr.io/liqotech/fabric"` | Image repository for the fabric pod. |
| networking.fabric.image.version | string | `""` | Custom version for the fabric image. If not specified, the global tag is used. |
Expand Down
12 changes: 12 additions & 0 deletions deployments/liqo/templates/liqo-fabric-daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ spec:
- --podname=$(POD_NAME)
- --nodename=$(NODE_NAME)
- --geneve-port={{ .Values.networking.genevePort }}
- --health-probe-bind-address=:{{ .Values.networking.fabric.config.healthProbeBindAddressPort}}
- --metrics-address=:{{ .Values.networking.fabric.config.metricsAddressPort}}
{{- if not .Values.requirements.kernel.enabled }}
- --disable-kernel-version-check
{{- end }}
Expand Down Expand Up @@ -79,6 +81,16 @@ spec:
valueFrom:
fieldRef:
fieldPath: metadata.name
{{- if and .Values.networking.fabric.config.healthProbeBindAddressPort (ne .Values.networking.fabric.config.healthProbeBindAddressPort "0") }}
ports:
- name: healthz
containerPort: {{ .Values.networking.fabric.config.healthProbeBindAddressPort }}
protocol: TCP
readinessProbe:
httpGet:
path: /readyz
port: healthz
{{- end }}
hostNetwork: true
{{- if .Values.networking.fabric.pod.priorityClassName }}
priorityClassName: {{ .Values.networking.fabric.pod.priorityClassName }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ spec:
ports:
- containerPort: 8082
name: gw-metrics
- containerPort: 8083
name: healthz
{{- end }}
env:
- name: NODE_NAME
Expand All @@ -94,6 +96,10 @@ spec:
add:
- NET_ADMIN
- NET_RAW
readinessProbe:
httpGet:
path: /readyz
port: healthz
- name: wireguard
image: {{ .Values.networking.gatewayTemplates.container.wireguard.image.name }}{{ include "liqo.suffix" $wireguardConfig }}:{{ include "liqo.version" $wireguardConfig }}
imagePullPolicy: {{ .Values.pullPolicy }}
Expand All @@ -116,6 +122,8 @@ spec:
ports:
- containerPort: 8084
name: wg-metrics
- containerPort: 8085
name: healthz
{{- end }}
securityContext:
capabilities:
Expand All @@ -130,6 +138,10 @@ spec:
mountPath: /ipc
- name: wireguard-config
mountPath: /etc/wireguard/keys
readinessProbe:
httpGet:
path: /readyz
port: healthz
- name: geneve
image: {{ .Values.networking.gatewayTemplates.container.geneve.image.name }}{{ include "liqo.suffix" $geneveConfig }}:{{ include "liqo.version" $geneveConfig }}
imagePullPolicy: {{ .Values.pullPolicy }}
Expand All @@ -154,6 +166,8 @@ spec:
ports:
- containerPort: 8086
name: gv-metrics
- containerPort: 8087
name: healthz
{{- end }}
env:
- name: NODE_NAME
Expand All @@ -169,6 +183,10 @@ spec:
add:
- NET_ADMIN
- NET_RAW
readinessProbe:
httpGet:
path: /readyz
port: healthz
# Uncomment to set a priorityClassName
# priorityClassName: ""
volumes:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,13 @@ spec:
- containerPort: 8082
name: gw-metrics
{{- end }}
ports:
- containerPort: 8083
name: healthz
readinessProbe:
httpGet:
path: /readyz
port: healthz
env:
- name: NODE_NAME
valueFrom:
Expand Down Expand Up @@ -143,6 +150,13 @@ spec:
- containerPort: 8084
name: wg-metrics
{{- end }}
ports:
- containerPort: 8085
name: healthz
readinessProbe:
httpGet:
path: /readyz
port: healthz
securityContext:
capabilities:
add:
Expand Down Expand Up @@ -172,7 +186,7 @@ spec:
{{- if .Values.metrics.enabled }}
- --metrics-address=:8086
{{- end }}
- --health-probe-bind-address=:8086
- --health-probe-bind-address=:8087
volumeMounts:
- name: ipc
mountPath: /ipc
Expand All @@ -181,6 +195,13 @@ spec:
- containerPort: 8086
name: gv-metrics
{{- end }}
ports:
- containerPort: 8087
name: healthz
readinessProbe:
httpGet:
path: /readyz
port: healthz
env:
- name: NODE_NAME
valueFrom:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,9 @@ spec:
{{- include "liqo.concatenateMap" $d | nindent 16 }}
{{- end }}
{{- if .Values.metrics.enabled }}
- --metrics-address=:8084
- --metrics-address=:8082
{{- end }}
- --health-probe-bind-address=:8085
- --health-probe-bind-address=:8083
- --ping-enabled=true
- --ping-loss-threshold={{ .Values.networking.gatewayTemplates.ping.lossThreshold }}
- --ping-interval={{ .Values.networking.gatewayTemplates.ping.interval }}
Expand All @@ -96,9 +96,15 @@ spec:
mountPath: /ipc
{{- if .Values.metrics.enabled }}
ports:
- containerPort: 8084
- containerPort: 8082
name: gw-metrics
- containerPort: 8083
name: healthz
{{- end }}
readinessProbe:
httpGet:
path: /readyz
port: healthz
env:
- name: NODE_NAME
valueFrom:
Expand Down Expand Up @@ -127,15 +133,21 @@ spec:
- --mtu={{"{{ .Spec.MTU }}"}}
- --listen-port={{"{{ .Spec.Endpoint.Port }}"}}
{{- if .Values.metrics.enabled }}
- --metrics-address=:8082
- --metrics-address=:8084
{{- end }}
- --health-probe-bind-address=:8083
- --health-probe-bind-address=:8085
- --implementation={{ .Values.networking.gatewayTemplates.wireguard.implementation }}
{{- if .Values.metrics.enabled }}
ports:
- containerPort: 8082
- containerPort: 8084
name: wg-metrics
- containerPort: 8085
name: healthz
{{- end }}
readinessProbe:
httpGet:
path: /readyz
port: healthz
securityContext:
capabilities:
add:
Expand Down Expand Up @@ -173,7 +185,13 @@ spec:
ports:
- containerPort: 8086
name: gv-metrics
- containerPort: 8087
name: healthz
{{- end }}
readinessProbe:
httpGet:
path: /readyz
port: healthz
env:
- name: NODE_NAME
valueFrom:
Expand Down
6 changes: 6 additions & 0 deletions deployments/liqo/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,12 @@ networking:
# In some cases (like K3S), this monitor can cause a huge amount of CPU usage.
# If you are experiencing high CPU usage, you can disable this feature.
nftablesMonitor: true
# -- Set the port where the fabric pod will expose the health probe.
# To disable the health probe, set the port to 0.
healthProbeBindAddressPort: "8081"
# -- Set the port where the fabric pod will expose the metrics.
# To disable the metrics, set the port to 0.
metricsAddressPort: "8082"

authentication:
# -- Enable/Disable the authentication module.
Expand Down
2 changes: 1 addition & 1 deletion pkg/gateway/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ func InitFlags(flagset *pflag.FlagSet, opts *Options) {
"RetryPeriod for the leader election")

flagset.StringVar(&opts.MetricsAddress, FlagNameMetricsAddress.String(), "0", "Address for the metrics endpoint")
flagset.StringVar(&opts.ProbeAddr, FlagNameProbeAddr.String(), ":8081", "Address for the health probe endpoint")
flagset.StringVar(&opts.ProbeAddr, FlagNameProbeAddr.String(), "0", "Address for the health probe endpoint")

flagset.BoolVar(&opts.DisableKernelVersionCheck, FlagNameDisableKernelVersionCheck.String(), false, "Disable the kernel version check")
flagset.Var(&opts.MinimumKernelVersion, FlagNameMinimumKernelVersion.String(), "Minimum kernel version required by Liqo")
Expand Down

0 comments on commit a719216

Please sign in to comment.