Skip to content

Commit

Permalink
Merge branch 'main' into feat/vethwatcher
Browse files Browse the repository at this point in the history
  • Loading branch information
nddq authored Jan 24, 2025
2 parents 7c7c763 + af88054 commit 0732e6a
Show file tree
Hide file tree
Showing 107 changed files with 6,499 additions and 460 deletions.
13 changes: 12 additions & 1 deletion .github/workflows/release-charts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,20 @@ jobs:
run: |
set -euo pipefail
export TAG=$(make version)
helm package ./deploy/legacy/manifests/controller/helm/retina --version $TAG
helm package ./deploy/standard/manifests/controller/helm/retina --version $TAG
# Get Helm chart's SHA digest from helm push cmd output
helm push retina-$TAG.tgz oci://ghcr.io/${{ github.repository }}/charts >> helm_push_result.txt 2>&1
cat helm_push_result.txt
cosign sign --yes ghcr.io/${{ github.repository }}/charts/retina@$(tail -n 1 helm_push_result.txt | awk '{ print $2 }')
- name: Build, Push and Sign Hubble chart
id: build_hubble_chart
shell: bash
run: |
set -euo pipefail
export TAG=$(make version)
helm package ./deploy/hubble/manifests/controller/helm/retina --version $TAG
# Get Helm chart's SHA digest from helm push cmd output
helm push retina-hubble-$TAG.tgz oci://ghcr.io/${{ github.repository }}/charts >> helm_push_hubble.txt 2>&1
cat helm_push_hubble.txt
cosign sign --yes ghcr.io/${{ github.repository }}/charts/retina-hubble@$(tail -n 1 helm_push_hubble.txt | awk '{ print $2 }')
5 changes: 3 additions & 2 deletions .github/workflows/scale-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -96,11 +96,12 @@ jobs:
NUM_REPLICAS: ${{ inputs.num_replicas }}
NUM_NETPOLS: ${{ inputs.num_netpol }}
CLEANUP: ${{ inputs.cleanup }}
IMAGE_REGISTRY: ${{ inputs.image_namespace == '' && vars.ACR_NAME || inputs.image_namespace }}
IMAGE_REGISTRY: ${{ vars.ACR_NAME }}
IMAGE_NAMESPACE: ${{ github.repository }}
TAG: ${{ inputs.image_tag }}
AZURE_APP_INSIGHTS_KEY: ${{ secrets.AZURE_APP_INSIGHTS_KEY }}
shell: bash
run: |
set -euo pipefail
go test -v ./test/e2e/. -timeout 300m -tags=scale -count=1 -args -image-tag=$( [[ $TAG == "" ]] && make version || echo $TAG ) -create-infra=false -delete-infra=false
[[ $TAG == "" ]] && TAG=$(make version)
go test -v ./test/e2e/. -timeout 300m -tags=scale -count=1 -args -create-infra=false -delete-infra=false
5 changes: 4 additions & 1 deletion .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ on:
pull_request:
branches: [main]
workflow_dispatch:

permissions:
actions: read
contents: read
Expand All @@ -15,6 +16,7 @@ permissions:
pull-requests: write
security-events: write
issues: write

jobs:
test-image:
runs-on: ubuntu-latest
Expand All @@ -32,8 +34,9 @@ jobs:
PULL_REQUEST_NUMBER: ${{ github.event.pull_request.number }}
run: |
make test-image IMAGE_NAMESPACE=${{ github.repository }} PLATFORM=linux/amd64
- name: Upload Artifacts
uses: actions/upload-artifact@v4
with:
name: coverage-files
path: ./coverage*
path: ./artifacts/coverage*
11 changes: 7 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ ifndef TAG
TAG ?= $(shell git describe --tags --always)
endif
OUTPUT_DIR = $(REPO_ROOT)/output
ARTIFACTS_DIR = $(REPO_ROOT)/artifacts
BUILD_DIR = $(OUTPUT_DIR)/$(GOOS)_$(GOARCH)
RETINA_BUILD_DIR = $(BUILD_DIR)/retina
RETINA_DIR = $(REPO_ROOT)/controller
Expand Down Expand Up @@ -241,6 +242,7 @@ container-docker: buildx # util target to build container images using docker bu
image_metadata_filename="image-metadata-$$image_name-$(TAG).json"; \
touch $$image_metadata_filename; \
echo "Building $$image_name for $$os/$$arch "; \
mkdir -p $(ARTIFACTS_DIR); \
docker buildx build \
--platform $(PLATFORM) \
--metadata-file=$$image_metadata_filename \
Expand All @@ -253,6 +255,7 @@ container-docker: buildx # util target to build container images using docker bu
--build-arg VERSION=$(VERSION) $(EXTRA_BUILD_ARGS) \
--target=$(TARGET) \
-t $(IMAGE_REGISTRY)/$(IMAGE):$(TAG) \
--output type=local,dest=$(ARTIFACTS_DIR) \
$(BUILDX_ACTION) \
$(CONTEXT_DIR)

Expand Down Expand Up @@ -441,7 +444,7 @@ HELM_IMAGE_TAG ?= $(LATEST_TAG)

# basic/node-level mode
helm-install: manifests
helm upgrade --install retina ./deploy/legacy/manifests/controller/helm/retina/ \
helm upgrade --install retina ./deploy/standard/manifests/controller/helm/retina/ \
--namespace kube-system \
--set image.repository=$(IMAGE_REGISTRY)/$(RETINA_IMAGE) \
--set image.initRepository=$(IMAGE_REGISTRY)/$(RETINA_INIT_IMAGE) \
Expand All @@ -454,7 +457,7 @@ helm-install: manifests
--set enabledPlugin_linux="\[dropreason\,packetforward\,linuxutil\,dns\]"

helm-install-with-operator: manifests
helm upgrade --install retina ./deploy/legacy/manifests/controller/helm/retina/ \
helm upgrade --install retina ./deploy/standard/manifests/controller/helm/retina/ \
--namespace kube-system \
--set image.repository=$(IMAGE_REGISTRY)/$(RETINA_IMAGE) \
--set image.initRepository=$(IMAGE_REGISTRY)/$(RETINA_INIT_IMAGE) \
Expand All @@ -471,7 +474,7 @@ helm-install-with-operator: manifests

# advanced/pod-level mode with scale limitations, where metrics are aggregated by source and destination Pod
helm-install-advanced-remote-context: manifests
helm upgrade --install retina ./deploy/legacy/manifests/controller/helm/retina/ \
helm upgrade --install retina ./deploy/standard/manifests/controller/helm/retina/ \
--namespace kube-system \
--set image.repository=$(IMAGE_REGISTRY)/$(RETINA_IMAGE) \
--set image.initRepository=$(IMAGE_REGISTRY)/$(RETINA_INIT_IMAGE) \
Expand All @@ -490,7 +493,7 @@ helm-install-advanced-remote-context: manifests

# advanced/pod-level mode designed for scale, where metrics are aggregated by "local" Pod (source for outgoing traffic, destination for incoming traffic)
helm-install-advanced-local-context: manifests
helm upgrade --install retina ./deploy/legacy/manifests/controller/helm/retina/ \
helm upgrade --install retina ./deploy/standard/manifests/controller/helm/retina/ \
--namespace kube-system \
--set image.repository=$(IMAGE_REGISTRY)/$(RETINA_IMAGE) \
--set image.initRepository=$(IMAGE_REGISTRY)/$(RETINA_INIT_IMAGE) \
Expand Down
4 changes: 2 additions & 2 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import (
"fmt"
"os"

"github.com/microsoft/retina/cmd/legacy"
"github.com/microsoft/retina/cmd/standard"
"github.com/spf13/cobra"
)

Expand All @@ -28,7 +28,7 @@ var (
RunE: func(cmd *cobra.Command, args []string) error {
// Do Stuff Here
fmt.Println("Starting Retina Agent")
d := legacy.NewDaemon(metricsAddr, probeAddr, cfgFile, enableLeaderElection)
d := standard.NewDaemon(metricsAddr, probeAddr, cfgFile, enableLeaderElection)
if err := d.Start(); err != nil {
return fmt.Errorf("starting daemon: %w", err)
}
Expand Down
2 changes: 1 addition & 1 deletion cmd/legacy/daemon.go → cmd/standard/daemon.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
package legacy
package standard

import (
"fmt"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package legacy
package standard

import "github.com/cilium/ebpf/rlimit"

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package legacy
package standard

func (d *Daemon) RemoveMemlock() error {
// This function is a no-op on Windows.
Expand Down
2 changes: 1 addition & 1 deletion crd/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ REPO_ROOT = $(shell git rev-parse --show-toplevel)
TOOLS_DIR = $(REPO_ROOT)/hack/tools
TOOLS_BIN_DIR = $(TOOLS_DIR)/bin
CONTROLLER_GEN = $(TOOLS_BIN_DIR)/controller-gen
HELM_CRD_DIR = $(REPO_ROOT)/deploy/legacy/manifests/controller/helm/retina/crds
HELM_CRD_DIR = $(REPO_ROOT)/deploy/standard/manifests/controller/helm/retina/crds

.PHONY: generate manifests

Expand Down
2 changes: 1 addition & 1 deletion deploy/hubble/manifests/controller/helm/retina/Chart.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
apiVersion: v2
name: retina
name: retina-hubble
description: A Helm chart for Retina Network Observability in Kubernetes with dependencies

# A chart can be either an 'application' or a 'library' chart.
Expand Down
3 changes: 0 additions & 3 deletions deploy/legacy/prometheus/retina/create-cm.sh

This file was deleted.

File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
1. Installing retina service using helm: helm install retina ./deploy/legacy/manifests/controller/helm/retina/ --namespace kube-system --dependency-update
1. Installing retina service using helm: helm install retina ./deploy/standard/manifests/controller/helm/retina/ --namespace kube-system --dependency-update
2. Cleaning up/uninstalling/deleting retina and dependencies related:
helm uninstall retina -n kube-system
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash

kubectl delete cm ama-metrics-prometheus-config-node -n kube-system
kubectl create configmap ama-metrics-prometheus-config-node --from-file=./deploy/legacy/prometheus/cilium/prometheus-config -n kube-system
kubectl create configmap ama-metrics-prometheus-config-node --from-file=./deploy/standard/prometheus/cilium/prometheus-config -n kube-system
k rollout restart ds ama-metrics-node -n kube-system
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@

kubectl delete cm ama-metrics-prometheus-config-node -n kube-system

kubectl create configmap ama-metrics-prometheus-config-node --from-file=./deploy/legacy/prometheus/retina-windows/prometheus-config -n kube-system
kubectl create configmap ama-metrics-prometheus-config-node --from-file=./deploy/standard/prometheus/retina-windows/prometheus-config -n kube-system
3 changes: 3 additions & 0 deletions deploy/standard/prometheus/retina/create-cm.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash

kubectl create configmap ama-metrics-prometheus-config-node --from-file=./deploy/standard/prometheus/retina/prometheus-config -n kube-system
File renamed without changes.
File renamed without changes.
File renamed without changes.
80 changes: 0 additions & 80 deletions docs/01-Intro.md

This file was deleted.

96 changes: 96 additions & 0 deletions docs/01-Introduction/01-intro.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
# What is Retina?

## Introduction

Retina is a cloud-agnostic, open-source **Kubernetes Network Observability platform** which enables the use of Hubble as a control plane regardless of the underlying OS or CNI.

Retina can help with DevOps, SecOps and compliance use cases.

It provides a **centralized hub for monitoring application and network health and security** (do we provide security?), catering to Cluster Network/Security Administrators and DevOps Engineers.

Retina **collects customizable telemetry**, which can be exported to **multiple storage options** (such as Prometheus, Azure Monitor, etc.) and **visualized in a variety of ways** (like Grafana, Azure Log Analytics, etc.).

![High Level Architecture](./img/Retina%20Arch.png "High Level Architecture")

## Features

- **[eBPF](https://ebpf.io/what-is-ebpf#what-is-ebpf) based** - Leverages eBPF technologies to collect and provide insights into your Kubernetes cluster with minimal overhead.
- **Platform Agnostic** - Works with any Cloud or On-Prem Kubernetes distribution and supports multiple OS such as Linux, Windows, Azure Linux, etc.
- **CNI Agnostic** - Works with any Container Networking Interfaces (CNIs) like Azure CNI, AWS VPC, etc.
- **Actionable Metrics** - Provides industry-standard Prometheus metrics.
- **Hubble Integration** - Integrates with Cilium's Hubble for additional network insights such as flows logs, DNS, etc
- **Packet Capture** - Distributed packet captures for deep dive troubleshooting

## Why Retina?

Retina lets you **investigate network issues on-demand** and **continuously monitor your clusters**. Here are a couple scenarios where Retina shines, minimizing pain points and investigation time.

### Use Case - Debugging Network Connectivity

*Why can't my Pods connect to each other any more?*

**Typical investigation is time-intensive** and involves manually performing packet captures, where one must first identify the Nodes involved, gain access to each Node, run `tcpdump` commands, and export the results off of each Node.

With Retina, you can **automate this process** with a **single CLI command** or CRD/YAML that can:

- Run captures on all Nodes hosting the Pods of interest.
- Upload each Node's results to a storage blob.

To begin using the CLI, see [Quick Start Installation](../02-Installation/02-CLI.md).

### Use Case - Monitoring Network Health

Retina supports actionable insights through **Prometheus** alerting, **Grafana** dashboards, and more. For instance, you can:

- Monitor dropped traffic in a namespace.
- Alert on a spike in production DNS errors.
- Watch changes in API Server latency while testing your application's scale.
- Notify your Security team if a Pod starts sending too much traffic.

## Telemetry

Retina uses two types of telemetry: metrics and captures.

### Metrics

Retina metrics provide **continuous observability** into:

- Incoming/outcoming traffic
- Dropped packets
- TCP/UDP
- DNS
- API Server latency
- Node/interface statistics

Retina provides both:

- **Basic metrics** - Node-Level (default)
- **Advanced metrics** - Pod-Level (if enabled)

For more info and a list of metrics, see [Metrics](../03-Metrics/modes/modes.md).

The same set of metrics are generated regardless of the underlying OS or CNI.

### Captures

A Retina capture **logs network traffic** and metadata **for the specified Nodes/Pods**.

Captures are **on-demand** and can be output to multiple destinations. For more info, see [Captures](../04-Captures/01-overview.md).

## What is Hubble?

Hubble is a fully distributed networking and security observability platform designed for cloud-native workloads. It’s built on top of [Cilium](https://cilium.io/get-started/) and [eBPF](https://ebpf.io/what-is-ebpf/), which allows it to provide deep visibility into the communication and behavior of services and the networking infrastructure.

You can read the official documentation here - [What is Hubble?](https://docs.cilium.io/en/stable/overview/intro/#what-is-hubble)

Both Hubble and Retina, are listed as emerging [eBPF Applications](https://ebpf.io/applications/)!

Hubble has historically been quite tightly coupled with Cilium. This led to challenges if you wanted to use another CNI, or perhaps go beyond Linux. Retina bridges this gap, and enables the use of a Hubble control plane on any CNI and across both Linux and Windows.

Check out our talk from KubeCon 2024 which goes into this topic even further - [Hubble Beyond Cilium - Anubhab Majumdar & Mathew Merrick, Microsoft](https://www.youtube.com/watch?v=cnNUfQKhYiM)

## Minimum System Requirements

The following are known system requirements for installing Retina:

- Minimum Linux Kernel Version: v5.4.0
Loading

0 comments on commit 0732e6a

Please sign in to comment.