From 3a06354c84e35da1093e49d7cc6720fde84e75a4 Mon Sep 17 00:00:00 2001 From: Will Gorman Date: Thu, 23 May 2024 08:23:02 -0500 Subject: [PATCH] Add gauge on all async tasks and max async task id (#15) --- pkg/prom/collector.go | 35 +++++++++++++++++---- pkg/prom/metrics.go | 16 +++++++++- pkg/solidfire/solidfire_test.go | 48 +++++++++++++++++------------ pkg/testutils/collector.go | 14 +++++++++ test/fixtures/ListAsyncResults.json | 14 ++++----- 5 files changed, 94 insertions(+), 33 deletions(-) diff --git a/pkg/prom/collector.go b/pkg/prom/collector.go index 5b190c2..0f89e3e 100644 --- a/pkg/prom/collector.go +++ b/pkg/prom/collector.go @@ -168,6 +168,8 @@ func (c *SolidfireCollector) Describe(ch chan<- *prometheus.Desc) { ch <- MetricDescriptions.VirtualVolumeTasks ch <- MetricDescriptions.BulkVolumeJobs ch <- MetricDescriptions.AsyncResultsActive + ch <- MetricDescriptions.AsyncResults + ch <- MetricDescriptions.MaxAsyncResultID } func (c *SolidfireCollector) collectVolumeMeta(ctx context.Context, ch chan<- prometheus.Metric) error { @@ -1296,24 +1298,32 @@ func (c *SolidfireCollector) collectAsyncResults(ctx context.Context, ch chan<- if err != nil { return err } - - m := make(map[string]int64) + maxAsyncResultID := 0 + activeAsyncResults := make(map[string]int64) + allAsyncResults := make(map[string]int64) for _, v := range ar.Result.AsyncHandles { + allAsyncResults[v.ResultType]++ if !v.Completed && !v.Success { - m[v.ResultType]++ + activeAsyncResults[v.ResultType]++ + } + if v.AsyncResultID > int64(maxAsyncResultID) { + maxAsyncResultID = int(v.AsyncResultID) } } types := []string{"DriveAdd", "BulkVolume", "Clone", "DriveRemoval", "RtfiPendingNode"} for _, t := range types { - if _, ok := m[t]; !ok { - m[t] = 0 + if _, ok := activeAsyncResults[t]; !ok { + activeAsyncResults[t] = 0 + } + if _, ok := allAsyncResults[t]; !ok { + allAsyncResults[t] = 0 } } mu.Lock() defer mu.Unlock() - for k, v := range m { + for k, v := range activeAsyncResults { ch <- prometheus.MustNewConstMetric( MetricDescriptions.AsyncResultsActive, prometheus.GaugeValue, @@ -1321,6 +1331,19 @@ func (c *SolidfireCollector) collectAsyncResults(ctx context.Context, ch chan<- k, ) } + for k, v := range allAsyncResults { + ch <- prometheus.MustNewConstMetric( + MetricDescriptions.AsyncResults, + prometheus.GaugeValue, + float64(v), + k, + ) + } + ch <- prometheus.MustNewConstMetric( + MetricDescriptions.MaxAsyncResultID, + prometheus.GaugeValue, + float64(maxAsyncResultID), + ) return nil } diff --git a/pkg/prom/metrics.go b/pkg/prom/metrics.go index cf392bb..10070f9 100644 --- a/pkg/prom/metrics.go +++ b/pkg/prom/metrics.go @@ -143,6 +143,8 @@ type Descriptions struct { VirtualVolumeTasks *prometheus.Desc BulkVolumeJobs *prometheus.Desc AsyncResultsActive *prometheus.Desc + AsyncResults *prometheus.Desc + MaxAsyncResultID *prometheus.Desc } func NewMetricDescriptions(namespace string) *Descriptions { @@ -920,10 +922,22 @@ func NewMetricDescriptions(namespace string) *Descriptions { ) d.AsyncResultsActive = prometheus.NewDesc( prometheus.BuildFQName(namespace, "", "cluster_volume_async_result_active"), - "The active jobs return by async results", + "The active jobs returned by async results", []string{"type"}, nil, ) + d.AsyncResults = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "", "cluster_volume_async_result"), + "All (active and completed) jobs returned by async results", + []string{"type"}, + nil, + ) + d.MaxAsyncResultID = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "", "cluster_max_async_result_id"), + "The maximum id used by async result handles", + nil, + nil, + ) return &d } diff --git a/pkg/solidfire/solidfire_test.go b/pkg/solidfire/solidfire_test.go index de1389a..87d8529 100644 --- a/pkg/solidfire/solidfire_test.go +++ b/pkg/solidfire/solidfire_test.go @@ -44,7 +44,7 @@ func TestClient_ListVolumeStats(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { defer gock.Off() - //gock.Observe(gock.DumpRequest) + // gock.Observe(gock.DumpRequest) gock.New(sfHost). Post(sfRPCEndpoint). MatchType("json"). @@ -54,7 +54,8 @@ func TestClient_ListVolumeStats(t *testing.T) { Params: solidfire.ListVolumeStatsRPCParams{ VolumeIDs: []int{}, IncludeVirtualVolumes: true, - }}). + }, + }). Reply(200). BodyString(string(fixture)) gotRaw, err := sfClient.ListVolumeStats(context.Background()) @@ -90,7 +91,7 @@ func TestClient_ListVolumes(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { defer gock.Off() - //gock.Observe(gock.DumpRequest) + // gock.Observe(gock.DumpRequest) gock.New(sfHost). Post(sfRPCEndpoint). MatchType("json"). @@ -99,7 +100,8 @@ func TestClient_ListVolumes(t *testing.T) { Method: solidfire.RPCListVolumes, Params: solidfire.ListVolumesRPCParams{ IncludeVirtualVolumes: true, - }}). + }, + }). Reply(200). BodyString(string(fixture)) gotRaw, err := sfClient.ListVolumes(context.Background()) @@ -114,6 +116,7 @@ func TestClient_ListVolumes(t *testing.T) { }) } } + func TestClient_GetClusterCapacity(t *testing.T) { fixture, err := ioutil.ReadFile(testutils.ResolveFixturePath(fixtureBasePath, solidfire.RPCGetClusterCapacity)) if err != nil { @@ -134,14 +137,15 @@ func TestClient_GetClusterCapacity(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { defer gock.Off() - //gock.Observe(gock.DumpRequest) + // gock.Observe(gock.DumpRequest) gock.New(sfHost). Post(sfRPCEndpoint). MatchType("json"). JSON(solidfire.RPCBody{ ID: 1, Method: solidfire.RPCGetClusterCapacity, - Params: solidfire.GetClusterCapacityRPCParams{}}). + Params: solidfire.GetClusterCapacityRPCParams{}, + }). Reply(200). BodyString(string(fixture)) gotRaw, err := sfClient.GetClusterCapacity(context.Background()) @@ -187,7 +191,8 @@ func TestClient_ListClusterFaults(t *testing.T) { Params: solidfire.ListClusterFaultsRPCParams{ FaultTypes: "current", BestPractices: true, - }}). + }, + }). Reply(200). BodyString(string(fixture)) @@ -232,7 +237,8 @@ func TestClient_ListNodeStats(t *testing.T) { JSON(solidfire.RPCBody{ ID: 1, Method: solidfire.RPCListNodeStats, - Params: solidfire.ListNodeStatsRPCParams{}}). + Params: solidfire.ListNodeStatsRPCParams{}, + }). Reply(200). BodyString(string(fixture)) @@ -279,7 +285,8 @@ func TestClient_ListVolumeQoSHistograms(t *testing.T) { Method: solidfire.RPCListVolumeQoSHistograms, Params: solidfire.ListVolumeQoSHistogramsRPCParams{ VolumeIDs: []int{}, // blank gives us all of them - }}). + }, + }). Reply(200). BodyString(string(fixture)) @@ -324,7 +331,8 @@ func TestClient_ListAllNodes(t *testing.T) { JSON(solidfire.RPCBody{ ID: 1, Method: solidfire.RPCListAllNodes, - Params: solidfire.ListAllNodesRPCParams{}}). + Params: solidfire.ListAllNodesRPCParams{}, + }). Reply(200). BodyString(string(fixture)) @@ -369,7 +377,8 @@ func TestClient_GetClusterStats(t *testing.T) { JSON(solidfire.RPCBody{ ID: 1, Method: solidfire.RPCGetClusterStats, - Params: solidfire.GetClusterStatsRPCParams{}}). + Params: solidfire.GetClusterStatsRPCParams{}, + }). Reply(200). BodyString(string(fixture)) @@ -414,7 +423,8 @@ func TestClient_GetClusterFullThreshold(t *testing.T) { JSON(solidfire.RPCBody{ ID: 1, Method: solidfire.RPCGetClusterFullThreshold, - Params: solidfire.GetClusterFullThresholdParams{}}). + Params: solidfire.GetClusterFullThresholdParams{}, + }). Reply(200). BodyString(string(fixture)) @@ -452,7 +462,7 @@ func TestClient_ListAccounts(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { defer gock.Off() - //gock.Observe(gock.DumpRequest) + // gock.Observe(gock.DumpRequest) gock.New(sfHost). Post(sfRPCEndpoint). MatchType("json"). @@ -496,7 +506,7 @@ func TestClient_ListInitiators(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { defer gock.Off() - //gock.Observe(gock.DumpRequest) + // gock.Observe(gock.DumpRequest) gock.New(sfHost). Post(sfRPCEndpoint). MatchType("json"). @@ -539,7 +549,7 @@ func TestClient_ListVirtualVolumeTasks(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { defer gock.Off() - //gock.Observe(gock.DumpRequest) + // gock.Observe(gock.DumpRequest) gock.New(sfHost). Post(sfRPCEndpoint). MatchType("json"). @@ -575,14 +585,14 @@ func TestClient_ListAsyncResults(t *testing.T) { wantErr bool }{ { - want: 47, + want: 43, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { defer gock.Off() - //gock.Observe(gock.DumpRequest) + // gock.Observe(gock.DumpRequest) gock.New(sfHost). Post(sfRPCEndpoint). MatchType("json"). @@ -625,7 +635,7 @@ func TestClient_ListBulkVolumeJobs(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { defer gock.Off() - //gock.Observe(gock.DumpRequest) + // gock.Observe(gock.DumpRequest) gock.New(sfHost). Post(sfRPCEndpoint). MatchType("json"). @@ -669,7 +679,7 @@ func TestClient_ListVolumeAccessGroups(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { defer gock.Off() - //gock.Observe(gock.DumpRequest) + // gock.Observe(gock.DumpRequest) gock.New(sfHost). Post(sfRPCEndpoint). MatchType("json"). diff --git a/pkg/testutils/collector.go b/pkg/testutils/collector.go index 43c946e..e7a4858 100644 --- a/pkg/testutils/collector.go +++ b/pkg/testutils/collector.go @@ -31,6 +31,7 @@ solidfire_cluster_last_sample_read_ops 0 solidfire_cluster_last_sample_write_bytes 0 solidfire_cluster_last_sample_write_ops 0 solidfire_cluster_latency_seconds 0 +solidfire_cluster_max_async_result_id 47 solidfire_cluster_max_iops 3000 solidfire_cluster_max_metadata_over_provision_factor 5 solidfire_cluster_max_over_provisionable_space_bytes 1.855425871872e+13 @@ -85,6 +86,12 @@ solidfire_cluster_volume_async_result_active{type="DriveAdd"} 0 solidfire_cluster_volume_async_result_active{type="DriveRemoval"} 0 solidfire_cluster_volume_async_result_active{type="NotClone"} 1 solidfire_cluster_volume_async_result_active{type="RtfiPendingNode"} 0 +solidfire_cluster_volume_async_result{type="BulkVolume"} 0 +solidfire_cluster_volume_async_result{type="Clone"} 5 +solidfire_cluster_volume_async_result{type="DriveAdd"} 0 +solidfire_cluster_volume_async_result{type="DriveRemoval"} 0 +solidfire_cluster_volume_async_result{type="NotClone"} 2 +solidfire_cluster_volume_async_result{type="RtfiPendingNode"} 0 solidfire_cluster_volume_bulk_volume_job_count 1 solidfire_cluster_write_bytes_total 1.21720639488e+11 solidfire_cluster_write_latency_seconds 0 @@ -315,6 +322,7 @@ solidfire_cluster_last_sample_read_ops 0 solidfire_cluster_last_sample_write_bytes 0 solidfire_cluster_last_sample_write_ops 0 solidfire_cluster_latency_seconds 0 +solidfire_cluster_max_async_result_id 47 solidfire_cluster_max_iops 3000 solidfire_cluster_max_metadata_over_provision_factor 5 solidfire_cluster_max_over_provisionable_space_bytes 1.855425871872e+13 @@ -369,6 +377,12 @@ solidfire_cluster_volume_async_result_active{type="DriveAdd"} 0 solidfire_cluster_volume_async_result_active{type="DriveRemoval"} 0 solidfire_cluster_volume_async_result_active{type="NotClone"} 1 solidfire_cluster_volume_async_result_active{type="RtfiPendingNode"} 0 +solidfire_cluster_volume_async_result{type="BulkVolume"} 0 +solidfire_cluster_volume_async_result{type="Clone"} 5 +solidfire_cluster_volume_async_result{type="DriveAdd"} 0 +solidfire_cluster_volume_async_result{type="DriveRemoval"} 0 +solidfire_cluster_volume_async_result{type="NotClone"} 2 +solidfire_cluster_volume_async_result{type="RtfiPendingNode"} 0 solidfire_cluster_volume_bulk_volume_job_count 1 solidfire_cluster_write_bytes_total 1.21720639488e+11 solidfire_cluster_write_latency_seconds 0 diff --git a/test/fixtures/ListAsyncResults.json b/test/fixtures/ListAsyncResults.json index 7874c4b..fbcfb7a 100644 --- a/test/fixtures/ListAsyncResults.json +++ b/test/fixtures/ListAsyncResults.json @@ -3,7 +3,7 @@ "result": { "asyncHandles": [ { - "asyncResultID": 47, + "asyncResultID": 43, "completed": true, "createTime": "2016-01-01T22:29:19Z", "data": { @@ -16,7 +16,7 @@ "success": true }, { - "asyncResultID": 47, + "asyncResultID": 46, "completed": true, "createTime": "2016-01-01T22:29:19Z", "data": { @@ -29,7 +29,7 @@ "success": true }, { - "asyncResultID": 47, + "asyncResultID": 45, "completed": false, "createTime": "2016-01-01T22:29:19Z", "data": { @@ -55,7 +55,7 @@ "success": true }, { - "asyncResultID": 47, + "asyncResultID": 40, "completed": true, "createTime": "2016-01-01T22:29:19Z", "data": { @@ -68,7 +68,7 @@ "success": false }, { - "asyncResultID": 47, + "asyncResultID": 42, "completed": true, "createTime": "2016-01-01T22:29:19Z", "data": { @@ -81,7 +81,7 @@ "success": true }, { - "asyncResultID": 47, + "asyncResultID": 39, "completed": false, "createTime": "2016-01-01T22:29:19Z", "data": { @@ -95,4 +95,4 @@ } ] } -} \ No newline at end of file +}