Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat!: Enable pprof server on demand (#16594) #17068

Closed
wants to merge 12 commits into from
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/argoproj/argo-cd/v2/controller/sharding"
"github.com/argoproj/argo-cd/v2/pkg/apis/application/v1alpha1"
appclientset "github.com/argoproj/argo-cd/v2/pkg/client/clientset/versioned"
"github.com/argoproj/argo-cd/v2/pkg/pprof"
"github.com/argoproj/argo-cd/v2/pkg/ratelimiter"
"github.com/argoproj/argo-cd/v2/reposerver/apiclient"
"github.com/argoproj/argo-cd/v2/util/argo/normalizers"
Expand Down Expand Up @@ -176,6 +177,19 @@ func NewCommand() *cobra.Command {
errors.CheckError(err)
cacheutil.CollectMetrics(redisClient, appController.GetMetricsServer())

// run pprof server
if pprof.IsEnabled() {
pprofSrv, err := pprof.NewPprofServer()
if err != nil {
log.Fatal(err, "failed to create pprof handler")
}
go func() {
if err := pprofSrv.Start(ctx); err != nil {
log.Fatal(err, "unable to start pprof handler")
}
}()
}

stats.RegisterStackDumper()
stats.StartStatsTicker(10 * time.Minute)
stats.RegisterHeapDumper("memprofile")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ import (
"github.com/argoproj/argo-cd/v2/applicationset/services"
appv1alpha1 "github.com/argoproj/argo-cd/v2/pkg/apis/application/v1alpha1"
appclientset "github.com/argoproj/argo-cd/v2/pkg/client/clientset/versioned"
"github.com/argoproj/argo-cd/v2/pkg/pprof"
"github.com/argoproj/argo-cd/v2/util/cli"
"github.com/argoproj/argo-cd/v2/util/db"
"github.com/argoproj/argo-cd/v2/util/errors"
Expand Down Expand Up @@ -217,6 +218,17 @@ func NewCommand() *cobra.Command {
os.Exit(1)
}

// run pprof server
if pprof.IsEnabled() {
pprofSrv, err := pprof.NewPprofServer()
if err != nil {
log.Fatal(err, "failed to create pprof handler")
}
if err := mgr.Add(pprofSrv); err != nil {
log.Fatal(err, "unable to set up pprof handler")
}
}

stats.StartStatsTicker(10 * time.Minute)
log.Info("Starting manager")
if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
Expand Down
15 changes: 15 additions & 0 deletions cmd/argocd-cmp-server/commands/argocd_cmp_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"github.com/argoproj/argo-cd/v2/cmpserver"
"github.com/argoproj/argo-cd/v2/cmpserver/plugin"
"github.com/argoproj/argo-cd/v2/common"
"github.com/argoproj/argo-cd/v2/pkg/pprof"
"github.com/argoproj/argo-cd/v2/util/cli"
"github.com/argoproj/argo-cd/v2/util/env"
"github.com/argoproj/argo-cd/v2/util/errors"
Expand Down Expand Up @@ -70,13 +71,27 @@ func NewCommand() *cobra.Command {
})
errors.CheckError(err)

// run pprof server
if pprof.IsEnabled() {
pprofSrv, err := pprof.NewPprofServer()
if err != nil {
log.Fatal(err, "failed to create pprof handler")
}
go func() {
if err := pprofSrv.Start(ctx); err != nil {
log.Fatal(err, "unable to start pprof handler")
}
}()
}

// register dumper
stats.RegisterStackDumper()
stats.StartStatsTicker(10 * time.Minute)
stats.RegisterHeapDumper("memprofile")

// run argocd-cmp-server server
server.Run()

return nil
},
}
Expand Down
15 changes: 15 additions & 0 deletions cmd/argocd-notification/commands/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"strings"

"github.com/argoproj/argo-cd/v2/common"
"github.com/argoproj/argo-cd/v2/pkg/pprof"
"github.com/argoproj/argo-cd/v2/reposerver/apiclient"

"github.com/argoproj/argo-cd/v2/util/env"
Expand Down Expand Up @@ -147,6 +148,20 @@ func NewCommand() *cobra.Command {
}

go ctrl.Run(ctx, processorsCount)

// run pprof server
if pprof.IsEnabled() {
pprofSrv, err := pprof.NewPprofServer()
if err != nil {
log.Fatal(err, "failed to create pprof handler")
}
go func() {
if err := pprofSrv.Start(ctx); err != nil {
log.Fatal(err, "unable to start pprof handler")
}
}()
}

<-ctx.Done()
return nil
},
Expand Down
14 changes: 14 additions & 0 deletions cmd/argocd-repo-server/commands/argocd_repo_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (

cmdutil "github.com/argoproj/argo-cd/v2/cmd/util"
"github.com/argoproj/argo-cd/v2/common"
"github.com/argoproj/argo-cd/v2/pkg/pprof"
"github.com/argoproj/argo-cd/v2/reposerver"
"github.com/argoproj/argo-cd/v2/reposerver/apiclient"
"github.com/argoproj/argo-cd/v2/reposerver/askpass"
Expand Down Expand Up @@ -189,6 +190,19 @@ func NewCommand() *cobra.Command {
}

log.Infof("argocd-repo-server is listening on %s", listener.Addr())

// run pprof server
if pprof.IsEnabled() {
pprofSrv, err := pprof.NewPprofServer()
if err != nil {
log.Fatal(err, "failed to create pprof handler")
}
go func() {
if err := pprofSrv.Start(ctx); err != nil {
log.Fatal(err, "unable to start pprof handler")
}
}()
}
stats.RegisterStackDumper()
stats.StartStatsTicker(10 * time.Minute)
stats.RegisterHeapDumper("memprofile")
Expand Down
14 changes: 14 additions & 0 deletions cmd/argocd-server/commands/argocd_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"github.com/argoproj/argo-cd/v2/common"
"github.com/argoproj/argo-cd/v2/pkg/apis/application/v1alpha1"
appclientset "github.com/argoproj/argo-cd/v2/pkg/client/clientset/versioned"
"github.com/argoproj/argo-cd/v2/pkg/pprof"
"github.com/argoproj/argo-cd/v2/reposerver/apiclient"
reposervercache "github.com/argoproj/argo-cd/v2/reposerver/cache"
"github.com/argoproj/argo-cd/v2/server"
Expand Down Expand Up @@ -233,6 +234,19 @@ func NewCommand() *cobra.Command {
EnableScmProviders: enableScmProviders,
}

// run pprof server
if pprof.IsEnabled() {
pprofSrv, err := pprof.NewPprofServer()
if err != nil {
log.Fatal(err, "failed to create pprof handler")
}
go func() {
if err := pprofSrv.Start(ctx); err != nil {
log.Fatal(err, "unable to start pprof handler")
}
}()
}

stats.RegisterStackDumper()
stats.StartStatsTicker(10 * time.Minute)
stats.RegisterHeapDumper("memprofile")
Expand Down
2 changes: 0 additions & 2 deletions controller/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ import (
applister "github.com/argoproj/argo-cd/v2/pkg/client/listers/application/v1alpha1"
"github.com/argoproj/argo-cd/v2/util/git"
"github.com/argoproj/argo-cd/v2/util/healthz"
"github.com/argoproj/argo-cd/v2/util/profile"

ctrl_metrics "sigs.k8s.io/controller-runtime/pkg/metrics"
)
Expand Down Expand Up @@ -169,7 +168,6 @@ func NewMetricsServer(addr string, appLister applister.ApplicationLister, appFil
// contains workqueue metrics, process and golang metrics
ctrl_metrics.Registry,
}, promhttp.HandlerOpts{}))
profile.RegisterProfiler(mux)
healthz.ServeHealthCheck(mux, healthCheck)

registry.MustRegister(syncCounter)
Expand Down
39 changes: 39 additions & 0 deletions docs/operator-manual/profiling.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Profiling with `pprof`

[`pprof`](https://go.dev/blog/pprof) is a powerful tool for profiling Go applications. It is part of the Go standard library and provides rich insights into the performance characteristics of Go programs, such as CPU usage, memory allocation, and contention.

## Basic Usage

Enable profiling endpoints by setting the environment variable `ARGO_PPROF` with your preferred port. For instance, `ARGO_PPROF=8888` will start profiling endpoints on port `8888`.

`pprof` has two modes: interactive and non-interactive. Non-interactive mode generates profiling data for future analysis. Interactive mode launches a web server to visualize the profiling data in real-time.

!!! Note "Port Forward"
[Port forwarding](https://kubernetes.io/docs/tasks/access-application-cluster/port-forward-access-application-cluster/) is a more secure approach to access debug level information than exposing these endpoints via an Ingress.
The below examples assume you have an Argo component forwarded to `http://localhost:6060`, but you can replace that with your preferred local port.

### Generate CPU Profile

Generate a CPU profile with the following command:

```bash
go tool pprof http://localhost:6060/debug/pprof/profile
```

### Generate Heap Profiles

Generate a heap profile with the following command:

```bash
go tool pprof http://localhost:6060/debug/pprof/heap
```

### Interactive Mode

Use interactive mode with the following command:

```bash
go tool pprof -http=:8080 http://localhost:6060/debug/pprof/profile
```

This starts a web server and opens a browser window displaying the profiling data.
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,13 @@ argocd-application-controller [flags]
--password string Password for basic authentication to the API server
--persist-resource-health Enables storing the managed resources health in the Application CRD (default true)
--proxy-url string If provided, this URL will be used to connect via proxy
--redis string Redis server hostname and port (e.g. argocd-redis:6379).
--redis string Redis server hostname and port (e.g. argocd-redis:6379).
--redis-ca-certificate string Path to Redis server CA certificate (e.g. /etc/certs/redis/ca.crt). If not specified, system trusted CAs will be used for server certificate validation.
--redis-client-certificate string Path to Redis client certificate (e.g. /etc/certs/redis/client.crt).
--redis-client-key string Path to Redis client key (e.g. /etc/certs/redis/client.crt).
--redis-compress string Enable compression for data sent to Redis with the required compression algorithm. (possible values: gzip, none) (default "gzip")
--redis-insecure-skip-tls-verify Skip Redis server certificate validation.
--redis-use-tls Use TLS when connecting to Redis.
--redis-use-tls Use TLS when connecting to Redis.
--redisdb int Redis database.
--repo-error-grace-period-seconds int Grace period in seconds for ignoring consecutive errors while communicating with repo server. (default 180)
--repo-server string Repo server address. (default "argocd-repo-server:8081")
Expand All @@ -66,7 +66,7 @@ argocd-application-controller [flags]
--repo-server-timeout-seconds int Repo server RPC call timeout seconds. (default 60)
--request-timeout string The length of time to wait before giving up on a single server request. Non-zero values should contain a corresponding time unit (e.g. 1s, 2m, 3h). A value of zero means don't timeout requests. (default "0")
--self-heal-timeout-seconds int Specifies timeout between application self heal attempts (default 5)
--sentinel stringArray Redis sentinel hostname and port (e.g. argocd-redis-ha-announce-0:6379).
--sentinel stringArray Redis sentinel hostname and port (e.g. argocd-redis-ha-announce-0:6379).
--sentinelmaster string Redis sentinel master group name. (default "master")
--server string The address and port of the Kubernetes API server
--server-side-diff-enabled Feature flag to enable ServerSide diff. Default ("false")
Expand Down
1 change: 1 addition & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ nav:
- operator-manual/custom-styles.md
- operator-manual/ui-customization.md
- operator-manual/metrics.md
- operator-manual/profiling.md
- operator-manual/web_based_terminal.md
- operator-manual/config-management-plugins.md
- operator-manual/deep_links.md
Expand Down
83 changes: 83 additions & 0 deletions pkg/pprof/server.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
package pprof

import (
"context"
"errors"
"fmt"
"net"
"net/http"
"net/http/pprof"
"os"
"time"

log "github.com/sirupsen/logrus"
)

var (
listener net.Listener
)

func init() {
addr, exist := os.LookupEnv("ARGO_PPROF")
if !exist {
return
}
listener, _ = net.Listen("tcp", fmt.Sprintf(":%s", addr))
}

func IsEnabled() bool {
return listener != nil
}

type pprofServer struct {
server *http.Server
}

func NewPprofServer() (*pprofServer, error) {
if listener == nil {
return nil, fmt.Errorf("pprof server is disabled")
}
mux := http.NewServeMux()
srv := &http.Server{
Handler: mux,
MaxHeaderBytes: 1 << 20,
IdleTimeout: 90 * time.Second, // matches http.DefaultTransport keep-alive timeout
ReadHeaderTimeout: 32 * time.Second,
}

mux.HandleFunc("/debug/pprof/", pprof.Index)
mux.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline)
mux.HandleFunc("/debug/pprof/profile", pprof.Profile)
mux.HandleFunc("/debug/pprof/symbol", pprof.Symbol)
mux.HandleFunc("/debug/pprof/trace", pprof.Trace)
Comment on lines +48 to +52
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

might want to remove this other code that added handlers previously: #7533

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lol, I didn't realize that the code was there 🤦
but, does ArgoCD already have support for enabling the profiler using vars?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mean, maybe I've added a feature that was already supported

Copy link

@agilgur5 agilgur5 Feb 8, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea I was just responding to someone on Slack about this and found the old PR and your PR and issue. Complete coincidence that I stumbled upon this, especially as I primarily contribute to Workflows.

but, does ArgoCD already have support for enabling the profiler using vars?

it seems to be unconditionally enabled on the metrics port(?) if I'm reading correctly. that would be a security issue if so (albeit a minor one, as it's just debug info of an OSS project), and a perf issue too.

also that means the port isn't customizable

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okey, we will replace that with my current approach, right? I mean, I have to remove the old code and just keep my PR

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the title in particular is used in the changelog and the prefix is typically used for cherry-picking (although CD has a bit different process for that). This might be a fix! since it's technically breaking?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is the ! required too in the title?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In conventional commits, the ! is an indication of a breaking change. This does remove the previously default enabled access to the pprof endpoints and so is technically breaking.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So the previous support for pprof actually only added it to the Application Controller and Server (some of the components may not have existed at the time; I think only support for the Repo Server was missing at the time of #7533), so this PR also adds it to the rest... which is technically a feature... 😖

So maybe this makes sense as a feat!: that is still backported due to the security issue? I'll let the CD Maintainers decide how to handle that 😅

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No problem , feat!:! xD

return &pprofServer{
server: srv,
}, nil
}

func (s *pprofServer) Start(ctx context.Context) error {
if listener == nil {
return fmt.Errorf("pprof server is disabled")
}
serverShutdown := make(chan struct{})
go func() {
<-ctx.Done()
log.Info("shutting down server")
if err := s.server.Shutdown(context.Background()); err != nil {
log.Error(err, "error shutting down server")
}
close(serverShutdown)
}()

log.Info("Starting pprof server")
if err := s.server.Serve(listener); err != nil && !errors.Is(err, http.ErrServerClosed) {
return err
}

<-serverShutdown
return nil
}

func (s *pprofServer) NeedLeaderElection() bool {
return false
}
3 changes: 0 additions & 3 deletions server/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import (
"github.com/prometheus/client_golang/prometheus/promhttp"

"github.com/argoproj/argo-cd/v2/common"
"github.com/argoproj/argo-cd/v2/util/profile"
)

type MetricsServer struct {
Expand Down Expand Up @@ -72,8 +71,6 @@ func NewMetricsServer(host string, port int) *MetricsServer {
}, promhttp.HandlerOpts{}))
argoVersion.WithLabelValues(common.GetVersion().Version).Set(1)

profile.RegisterProfiler(mux)

registry.MustRegister(redisRequestCounter)
registry.MustRegister(redisRequestHistogram)
registry.MustRegister(extensionRequestCounter)
Expand Down
15 changes: 0 additions & 15 deletions util/profile/profile.go

This file was deleted.

Loading