Skip to content
This repository has been archived by the owner on Aug 2, 2022. It is now read-only.

Commit

Permalink
Adding Operational Metrics for Cache RCAs (#481)
Browse files Browse the repository at this point in the history
* Adding Operational Metrics for Cache
  • Loading branch information
khushbr authored Oct 19, 2020
1 parent 4c78b48 commit 378e0cb
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,6 @@ private <R extends BaseClusterRca> List<Action> getActionsFromRca(
}

private void configureModifyCacheActionPriority() {
// Assigning shard request cache higher priority over field data cache
// TODO: Modify as per the performance test results
this.modifyCacheActionPriorityList.add(ResourceEnum.SHARD_REQUEST_CACHE);
this.modifyCacheActionPriorityList.add(ResourceEnum.FIELD_DATA_CACHE);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,13 @@ public enum RcaVerticesMetrics implements MeasurementSet {
NUM_HIGH_HEAP_CLUSTER_RCA_TRIGGERED(
"HighHeapClusterRcaCount", "count", Collections.singletonList(Statistics.COUNT)),
YOUNG_GEN_RCA_NAMED_COUNT(
"YoungGenRcaNamedCount", "namedCount", Collections.singletonList(Statistics.NAMED_COUNTERS));
"YoungGenRcaNamedCount", "namedCount", Collections.singletonList(Statistics.NAMED_COUNTERS)),
NUM_FIELD_DATA_CACHE_RCA_TRIGGERED(
"FieldDataCacheRcaCount", "count", Collections.singletonList(Statistics.COUNT)),
NUM_SHARD_REQUEST_CACHE_RCA_TRIGGERED(
"ShardRequestCacheCount", "count", Collections.singletonList(Statistics.COUNT)),
CLUSTER_RCA_NAMED_COUNT(
"ClusterRcaNamedCount", "namedCount", Collections.singletonList(Statistics.NAMED_COUNTERS));

/** What we want to appear as the metric name. */
private String name;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import static com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.store.rca.cache.CacheUtil.getCacheMaxSize;
import static com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.store.rca.cache.CacheUtil.isSizeThresholdExceeded;

import com.amazon.opendistro.elasticsearch.performanceanalyzer.PerformanceAnalyzerApp;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.grpc.FlowUnitMessage;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.grpc.Resource;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.metricsdb.MetricsDB;
Expand All @@ -33,6 +34,7 @@
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.summaries.HotResourceSummary;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.summaries.ResourceUtil;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.core.RcaConf;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.metrics.RcaVerticesMetrics;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.util.InstanceDetails;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.scheduler.FlowUnitOperationArgWrapper;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.store.rca.cluster.NodeKey;
Expand Down Expand Up @@ -121,6 +123,8 @@ public ResourceFlowUnit<HotNodeSummary> operate() {
if (cacheEvictionCollector.isUnhealthy(currTimestamp) && exceedsSizeThreshold) {
context = new ResourceContext(Resources.State.UNHEALTHY);
nodeSummary.appendNestedSummary(cacheEvictionCollector.generateSummary(currTimestamp));
PerformanceAnalyzerApp.RCA_VERTICES_METRICS_AGGREGATOR.updateStat(
RcaVerticesMetrics.NUM_FIELD_DATA_CACHE_RCA_TRIGGERED, instanceDetails.getInstanceId().toString(), 1);
} else {
context = new ResourceContext(Resources.State.HEALTHY);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import static com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.store.rca.cache.CacheUtil.getCacheMaxSize;
import static com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.store.rca.cache.CacheUtil.isSizeThresholdExceeded;

import com.amazon.opendistro.elasticsearch.performanceanalyzer.PerformanceAnalyzerApp;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.grpc.FlowUnitMessage;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.grpc.Resource;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.metricsdb.MetricsDB;
Expand All @@ -35,6 +36,7 @@
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.summaries.HotResourceSummary;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.summaries.ResourceUtil;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.core.RcaConf;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.metrics.RcaVerticesMetrics;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.util.InstanceDetails;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.scheduler.FlowUnitOperationArgWrapper;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.store.rca.cluster.NodeKey;
Expand Down Expand Up @@ -145,6 +147,8 @@ public ResourceFlowUnit operate() {
&& exceedsSizeThreshold) {
context = new ResourceContext(Resources.State.UNHEALTHY);
nodeSummary.appendNestedSummary(cacheEvictionCollector.generateSummary(currTimestamp));
PerformanceAnalyzerApp.RCA_VERTICES_METRICS_AGGREGATOR.updateStat(
RcaVerticesMetrics.NUM_SHARD_REQUEST_CACHE_RCA_TRIGGERED, instanceDetails.getInstanceId().toString(), 1);
} else {
context = new ResourceContext(Resources.State.HEALTHY);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,15 @@

package com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.store.rca.cluster;

import com.amazon.opendistro.elasticsearch.performanceanalyzer.PerformanceAnalyzerApp;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.Rca;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.Resources;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.Resources.State;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.contexts.ResourceContext;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.flow_units.ResourceFlowUnit;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.summaries.HotClusterSummary;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.summaries.HotNodeSummary;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.metrics.RcaVerticesMetrics;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.util.InstanceDetails;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.scheduler.FlowUnitOperationArgWrapper;
import com.google.common.annotations.VisibleForTesting;
Expand Down Expand Up @@ -168,6 +170,8 @@ private ResourceFlowUnit<HotClusterSummary> generateFlowUnit() {
for (HotNodeSummary nodeSummary : unhealthyNodeSummaries) {
clusterSummary.appendNestedSummary(nodeSummary);
}
PerformanceAnalyzerApp.RCA_VERTICES_METRICS_AGGREGATOR.updateStat(
RcaVerticesMetrics.CLUSTER_RCA_NAMED_COUNT, this.getClass().getName(), 1);
return new ResourceFlowUnit<>(timestamp, new ResourceContext(Resources.State.UNHEALTHY), clusterSummary, true);
}
else {
Expand Down

0 comments on commit 378e0cb

Please sign in to comment.