Skip to content

Commit

Permalink
observability: add prometheus variable labels; remove collector
Browse files Browse the repository at this point in the history
* when re-initializing backends (new-aws() and friends):
  - register the corresponding metrics only once
* part two, prev. commit: 118a821

Signed-off-by: Alex Aizman <[email protected]>
  • Loading branch information
alex-aizman committed Dec 24, 2024
1 parent 118a821 commit 38bc48a
Show file tree
Hide file tree
Showing 11 changed files with 46 additions and 34 deletions.
12 changes: 8 additions & 4 deletions ais/backend/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,17 +66,21 @@ var _ core.Backend = (*s3bp)(nil)

// environment variables => static defaults that can still be overridden via bck.Props.Extra.AWS
// in addition to these two (below), default bucket region = env.AwsDefaultRegion()
func NewAWS(t core.TargetPut, tstats stats.Tracker) (core.Backend, error) {
func NewAWS(t core.TargetPut, tstats stats.Tracker, startingUp bool) (core.Backend, error) {
s3Endpoint = os.Getenv(env.AWS.Endpoint)
awsProfile = os.Getenv(env.AWS.Profile)
bp := &s3bp{
t: t,
mm: t.PageMM(),
base: base{provider: apc.AWS},
}
bp.base.init(t.Snode(), tstats)
// reset clients map to recreate and reload credentials
clients.Clear()
if startingUp {
// register metrics only once
bp.base.init(t.Snode(), tstats)
} else {
// reset clients map to recreate and reload credentials
clients.Clear()
}
return bp, nil
}

Expand Down
7 changes: 5 additions & 2 deletions ais/backend/azure.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ func asEndpoint() string {
}
}

func NewAzure(t core.TargetPut, tstats stats.Tracker) (core.Backend, error) {
func NewAzure(t core.TargetPut, tstats stats.Tracker, startingUp bool) (core.Backend, error) {
blurl := asEndpoint()

// NOTE: NewSharedKeyCredential requires account name and its primary or secondary key
Expand All @@ -112,7 +112,10 @@ func NewAzure(t core.TargetPut, tstats stats.Tracker) (core.Backend, error) {
u: blurl,
base: base{provider: apc.Azure},
}
bp.base.init(t.Snode(), tstats)
if startingUp {
// register metrics only once
bp.base.init(t.Snode(), tstats)
}
return bp, nil
}

Expand Down
8 changes: 5 additions & 3 deletions ais/backend/gcp.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ var (
_ core.Backend = (*gsbp)(nil)
)

func NewGCP(t core.TargetPut, tstats stats.Tracker) (_ core.Backend, err error) {
func NewGCP(t core.TargetPut, tstats stats.Tracker, startingUp bool) (_ core.Backend, err error) {
var (
projectID string
credProjectID = readCredFile()
Expand All @@ -89,8 +89,10 @@ func NewGCP(t core.TargetPut, tstats stats.Tracker) (_ core.Backend, err error)
projectID: projectID,
base: base{provider: apc.GCP},
}
bp.base.init(t.Snode(), tstats)

if startingUp {
// register metrics only once
bp.base.init(t.Snode(), tstats)
}
gctx = context.Background()
gcpClient, err = bp.createClient(gctx)

Expand Down
2 changes: 1 addition & 1 deletion ais/backend/mock_aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import (
"github.com/NVIDIA/aistore/stats"
)

func NewAWS(_ core.TargetPut, _ stats.Tracker) (core.Backend, error) {
func NewAWS(core.TargetPut, stats.Tracker, bool) (core.Backend, error) {
return nil, &cmn.ErrInitBackend{Provider: apc.AWS}
}

Expand Down
2 changes: 1 addition & 1 deletion ais/backend/mock_azure.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@ import (
"github.com/NVIDIA/aistore/stats"
)

func NewAzure(_ core.TargetPut, _ stats.Tracker) (core.Backend, error) {
func NewAzure(core.TargetPut, stats.Tracker, bool) (core.Backend, error) {
return nil, &cmn.ErrInitBackend{Provider: apc.Azure}
}
2 changes: 1 addition & 1 deletion ais/backend/mock_gcp.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@ import (
"github.com/NVIDIA/aistore/stats"
)

func NewGCP(_ core.TargetPut, _ stats.Tracker) (core.Backend, error) {
func NewGCP(core.TargetPut, stats.Tracker, bool) (core.Backend, error) {
return nil, &cmn.ErrInitBackend{Provider: apc.GCP}
}
2 changes: 1 addition & 1 deletion ais/backend/mock_oci.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@ import (
"github.com/NVIDIA/aistore/stats"
)

func NewOCI(core.TargetPut, stats.Tracker) (core.Backend, error) {
func NewOCI(core.TargetPut, stats.Tracker, bool) (core.Backend, error) {
return nil, &cmn.ErrInitBackend{Provider: apc.GCP}
}
7 changes: 5 additions & 2 deletions ais/backend/oci.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ type ocibp struct {
base
}

func NewOCI(t core.TargetPut, tstats stats.Tracker) (core.Backend, error) {
func NewOCI(t core.TargetPut, tstats stats.Tracker, startingUp bool) (core.Backend, error) {
bp := &ocibp{
t: t,
base: base{provider: apc.AWS},
Expand Down Expand Up @@ -128,7 +128,10 @@ func NewOCI(t core.TargetPut, tstats stats.Tracker) (core.Backend, error) {
}
bp.namespace = *resp.Value

bp.base.init(t.Snode(), tstats)
if startingUp {
// register metrics only once
bp.base.init(t.Snode(), tstats)
}

return bp, nil
}
Expand Down
12 changes: 6 additions & 6 deletions ais/target.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,14 +114,14 @@ func (t *target) initBackends(tstats *stats.Trunner) {
}
}

if err := t.initBuiltTagged(tstats, config); err != nil {
if err := t.initBuiltTagged(tstats, config, true /*starting up*/); err != nil {
cos.ExitLog(err)
}
}

// - remote (e.g. cloud) backends w/ empty stubs unless populated via build tags
// - enabled/disabled via config.Backend
func (t *target) initBuiltTagged(tstats *stats.Trunner, config *cmn.Config) error {
func (t *target) initBuiltTagged(tstats *stats.Trunner, config *cmn.Config, startingUp bool) error {
var enabled, disabled, notlinked []string

for provider := range apc.Providers {
Expand All @@ -131,13 +131,13 @@ func (t *target) initBuiltTagged(tstats *stats.Trunner, config *cmn.Config) erro
)
switch provider {
case apc.AWS:
add, err = backend.NewAWS(t, tstats)
add, err = backend.NewAWS(t, tstats, startingUp)
case apc.GCP:
add, err = backend.NewGCP(t, tstats)
add, err = backend.NewGCP(t, tstats, startingUp)
case apc.Azure:
add, err = backend.NewAzure(t, tstats)
add, err = backend.NewAzure(t, tstats, startingUp)
case apc.OCI:
add, err = backend.NewOCI(t, tstats)
add, err = backend.NewOCI(t, tstats, startingUp)
case apc.HT:
add, err = backend.NewHT(t, config, tstats)
case apc.AIS:
Expand Down
8 changes: 4 additions & 4 deletions ais/test/object_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -922,13 +922,13 @@ func TestValidateOnWarmGetRemoteBucket(t *testing.T) {
var mockBackend core.Backend
switch m.bck.Provider {
case apc.AWS:
mockBackend, _ = backend.NewAWS(tMock, mock.NewStatsTracker())
mockBackend, _ = backend.NewAWS(tMock, mock.NewStatsTracker(), false /*starting up*/)
case apc.GCP:
mockBackend, _ = backend.NewGCP(tMock, mock.NewStatsTracker())
mockBackend, _ = backend.NewGCP(tMock, mock.NewStatsTracker(), false /*starting up*/)
case apc.Azure:
mockBackend, _ = backend.NewAzure(tMock, mock.NewStatsTracker())
mockBackend, _ = backend.NewAzure(tMock, mock.NewStatsTracker(), false /*starting up*/)
case apc.OCI:
mockBackend, _ = backend.NewOCI(tMock, mock.NewStatsTracker())
mockBackend, _ = backend.NewOCI(tMock, mock.NewStatsTracker(), false /*starting up*/)
default:
t.Fatalf("unexpected backend provider %q", m.bck.Provider)
}
Expand Down
18 changes: 9 additions & 9 deletions ais/tgtcp.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ func (t *target) daeputMsg(w http.ResponseWriter, r *http.Request) {
provider = msg.Name
)
if provider == "" { // all
if err := t.initBuiltTagged(t.statsT.(*stats.Trunner), cmn.GCO.Get()); err != nil {
if err := t.initBuiltTagged(t.statsT.(*stats.Trunner), cmn.GCO.Get(), false); err != nil {
t.writeErr(w, r, err)
}
return
Expand All @@ -198,13 +198,13 @@ func (t *target) daeputMsg(w http.ResponseWriter, r *http.Request) {
var add core.Backend
switch provider {
case apc.AWS:
add, err = backend.NewAWS(t, tstats)
add, err = backend.NewAWS(t, tstats, false /*starting up*/)
case apc.GCP:
add, err = backend.NewGCP(t, tstats)
add, err = backend.NewGCP(t, tstats, false)
case apc.Azure:
add, err = backend.NewAzure(t, tstats)
add, err = backend.NewAzure(t, tstats, false)
case apc.OCI:
add, err = backend.NewOCI(t, tstats)
add, err = backend.NewOCI(t, tstats, false)
}
if err != nil {
t.writeErr(w, r, err)
Expand Down Expand Up @@ -301,13 +301,13 @@ func (t *target) enableBackend(w http.ResponseWriter, r *http.Request, items []s
var err error
switch provider {
case apc.AWS:
bp, err = backend.NewAWS(t, t.statsT)
bp, err = backend.NewAWS(t, t.statsT, false /*starting up*/)
case apc.GCP:
bp, err = backend.NewGCP(t, t.statsT)
bp, err = backend.NewGCP(t, t.statsT, false /*starting up*/)
case apc.Azure:
bp, err = backend.NewAzure(t, t.statsT)
bp, err = backend.NewAzure(t, t.statsT, false /*starting up*/)
case apc.OCI:
bp, err = backend.NewOCI(t, t.statsT)
bp, err = backend.NewOCI(t, t.statsT, false /*starting up*/)
}
if err != nil {
debug.AssertNoErr(err) // (unlikely)
Expand Down

0 comments on commit 38bc48a

Please sign in to comment.