diff --git a/core/src/main/java/org/apache/accumulo/core/conf/Property.java b/core/src/main/java/org/apache/accumulo/core/conf/Property.java
index 80a9b21b33f..fc5a52f2393 100644
--- a/core/src/main/java/org/apache/accumulo/core/conf/Property.java
+++ b/core/src/main/java/org/apache/accumulo/core/conf/Property.java
@@ -382,9 +382,6 @@ public enum Property {
MANAGER_WALOG_CLOSER_IMPLEMETATION("manager.walog.closer.implementation",
"org.apache.accumulo.server.manager.recovery.HadoopLogCloser", PropertyType.CLASSNAME,
"A class that implements a mechanism to steal write access to a write-ahead log.", "1.5.0"),
- @Deprecated
- MANAGER_FATE_METRICS_ENABLED("manager.fate.metrics.enabled", "true", PropertyType.BOOLEAN,
- "Enable reporting of FATE metrics in JMX (and logging with Hadoop Metrics2).", "1.9.3"),
MANAGER_FATE_METRICS_MIN_UPDATE_INTERVAL("manager.fate.metrics.min.update.interval", "60s",
PropertyType.TIMEDURATION, "Limit calls from metric sinks to zookeeper to update interval.",
"1.9.3"),
@@ -924,9 +921,6 @@ public enum Property {
+ " and possibly compacted. Legal values are: compact - which both flushes and compacts the"
+ " metadata; flush - which flushes only (compactions may be triggered if required); or none.",
"1.10.0"),
- @Deprecated
- GC_METRICS_ENABLED("gc.metrics.enabled", "true", PropertyType.BOOLEAN,
- "Enable detailed gc metrics reporting with hadoop metrics.", "1.10.0"),
// properties that are specific to the monitor server behavior
MONITOR_PREFIX("monitor.", null, PropertyType.PREFIX,
diff --git a/core/src/main/java/org/apache/accumulo/core/metrics/MetricsProducer.java b/core/src/main/java/org/apache/accumulo/core/metrics/MetricsProducer.java
index 954e6d98c35..3a368ef1c7f 100644
--- a/core/src/main/java/org/apache/accumulo/core/metrics/MetricsProducer.java
+++ b/core/src/main/java/org/apache/accumulo/core/metrics/MetricsProducer.java
@@ -314,37 +314,6 @@
*
|
*
*
- * queries |
- * Gauge |
- * {@value #METRICS_TSERVER_QUERIES} |
- * Gauge |
- * |
- *
- *
- * scannedRate |
- * Gauge |
- * {@value #METRICS_TSERVER_SCANNED_ENTRIES} |
- * Gauge |
- * Prior to 2.1.0 this metric was reported as a rate, it is now the count and the rate can be
- * derived |
- *
- *
- * queryRate |
- * Gauge |
- * {@value #METRICS_TSERVER_SCAN_RESULTS} |
- * Gauge |
- * Prior to 2.1.0 this metric was reported as a rate, it is now the count and the rate can be
- * derived |
- *
- *
- * queryByteRate |
- * Gauge |
- * {@value #METRICS_TSERVER_SCAN_RESULTS_BYTES} |
- * Gauge |
- * Prior to 2.1.0 this metric was reported as a rate, it is now the count and the rate can be
- * derived |
- *
- *
* ingestRate |
* Gauge |
* {@value #METRICS_TSERVER_INGEST_MUTATIONS} |
@@ -367,6 +336,28 @@
* Gauge |
* |
*
+ *
+ *
+ * N/A |
+ * N/A |
+ * {@value #METRICS_SCAN_RESERVATION_TIMER} |
+ * Timer |
+ * Time to reserve a tablets files for scan |
+ *
+ *
+ * N/A |
+ * N/A |
+ * {@value #METRICS_SCAN_BUSY_TIMEOUT_COUNTER} |
+ * Counter |
+ * Count of the scans where a busy timeout happened |
+ *
+ *
+ * N/A |
+ * N/A |
+ * {@value #METRICS_SCAN_TABLET_METADATA_CACHE} |
+ * Cache |
+ * scan server tablet cache metrics |
+ *
*
*
* scan |
@@ -418,12 +409,36 @@
* |
*
*
- * N/A |
- * N/A |
- * {@value #METRICS_SCAN_BUSY_TIMEOUT} |
- * Counter |
+ * queries |
+ * Gauge |
+ * {@value #METRICS_SCAN_QUERIES} |
+ * Gauge |
* |
*
+ *
+ * scannedRate |
+ * Gauge |
+ * {@value #METRICS_SCAN_SCANNED_ENTRIES} |
+ * Gauge |
+ * Prior to 2.1.0 this metric was reported as a rate, it is now the count and the rate can be
+ * derived |
+ *
+ *
+ * queryRate |
+ * Gauge |
+ * {@value #METRICS_SCAN_QUERY_SCAN_RESULTS} |
+ * Gauge |
+ * Prior to 2.1.0 this metric was reported as a rate, it is now the count and the rate can be
+ * derived |
+ *
+ *
+ * queryByteRate |
+ * Gauge |
+ * {@value #METRICS_SCAN_QUERY_SCAN_RESULTS_BYTES} |
+ * Gauge |
+ * Prior to 2.1.0 this metric was reported as a rate, it is now the count and the rate can be
+ * derived |
+ *
*
*
* {i|e}_{compactionServiceName}_{executor_name}_queued |
@@ -605,7 +620,7 @@ public interface MetricsProducer {
String METRICS_REPLICATION_PEERS = METRICS_REPLICATION_PREFIX + "peers";
String METRICS_REPLICATION_THREADS = METRICS_REPLICATION_PREFIX + "threads";
- String METRICS_SCAN_PREFIX = "accumulo.tserver.scans.";
+ String METRICS_SCAN_PREFIX = "accumulo.scan.";
String METRICS_SCAN_TIMES = METRICS_SCAN_PREFIX + "times";
String METRICS_SCAN_OPEN_FILES = METRICS_SCAN_PREFIX + "files.open";
String METRICS_SCAN_RESULTS = METRICS_SCAN_PREFIX + "result";
@@ -613,7 +628,14 @@ public interface MetricsProducer {
String METRICS_SCAN_START = METRICS_SCAN_PREFIX + "start";
String METRICS_SCAN_CONTINUE = METRICS_SCAN_PREFIX + "continue";
String METRICS_SCAN_CLOSE = METRICS_SCAN_PREFIX + "close";
- String METRICS_SCAN_BUSY_TIMEOUT = METRICS_SCAN_PREFIX + "busy.timeout";
+ String METRICS_SCAN_BUSY_TIMEOUT_COUNTER = METRICS_SCAN_PREFIX + "busy.timeout.count";
+ String METRICS_SCAN_RESERVATION_TIMER = METRICS_SCAN_PREFIX + "reservation.timer";
+ String METRICS_SCAN_QUERIES = METRICS_SCAN_PREFIX + "queries";
+ String METRICS_SCAN_QUERY_SCAN_RESULTS = METRICS_SCAN_PREFIX + "query.results";
+ String METRICS_SCAN_QUERY_SCAN_RESULTS_BYTES = METRICS_SCAN_PREFIX + "query.results.bytes";
+ String METRICS_SCAN_SCANNED_ENTRIES = METRICS_SCAN_PREFIX + "query.scanned.entries";
+
+ String METRICS_SCAN_TABLET_METADATA_CACHE = METRICS_SCAN_PREFIX + "tablet.metadata.cache";
String METRICS_TSERVER_PREFIX = "accumulo.tserver.";
String METRICS_TSERVER_ENTRIES = METRICS_TSERVER_PREFIX + "entries";
@@ -629,14 +651,10 @@ public interface MetricsProducer {
String METRICS_TSERVER_TABLETS_ONLINE = METRICS_TSERVER_PREFIX + "tablets.online";
String METRICS_TSERVER_TABLETS_OPENING = METRICS_TSERVER_PREFIX + "tablets.opening";
String METRICS_TSERVER_TABLETS_UNOPENED = METRICS_TSERVER_PREFIX + "tablets.unopened";
- String METRICS_TSERVER_QUERIES = METRICS_TSERVER_PREFIX + "queries";
String METRICS_TSERVER_TABLETS_FILES = METRICS_TSERVER_PREFIX + "tablets.files";
String METRICS_TSERVER_HOLD = METRICS_TSERVER_PREFIX + "hold";
String METRICS_TSERVER_INGEST_MUTATIONS = METRICS_TSERVER_PREFIX + "ingest.mutations";
String METRICS_TSERVER_INGEST_BYTES = METRICS_TSERVER_PREFIX + "ingest.bytes";
- String METRICS_TSERVER_SCAN_RESULTS = METRICS_TSERVER_PREFIX + "scan.results";
- String METRICS_TSERVER_SCAN_RESULTS_BYTES = METRICS_TSERVER_PREFIX + "scan.results.bytes";
- String METRICS_TSERVER_SCANNED_ENTRIES = METRICS_TSERVER_PREFIX + "scan.scanned.entries";
String METRICS_THRIFT_PREFIX = "accumulo.thrift.";
String METRICS_THRIFT_EXECUTE = METRICS_THRIFT_PREFIX + "execute";
@@ -673,7 +691,7 @@ default Map getMetricFields() {
fields.put((String) f.get(MetricsProducer.class), f.getName());
} catch (IllegalArgumentException | IllegalAccessException e) {
// this shouldn't happen, but let's log it anyway
- LOG.error("Error getting metric value for field: " + f.getName());
+ LOG.error("Error getting metric value for field: {}", f.getName());
}
}
}
diff --git a/server/compaction-coordinator/src/main/java/org/apache/accumulo/coordinator/CompactionCoordinator.java b/server/compaction-coordinator/src/main/java/org/apache/accumulo/coordinator/CompactionCoordinator.java
index 2b0dee53b7e..9ac4ddd27a5 100644
--- a/server/compaction-coordinator/src/main/java/org/apache/accumulo/coordinator/CompactionCoordinator.java
+++ b/server/compaction-coordinator/src/main/java/org/apache/accumulo/coordinator/CompactionCoordinator.java
@@ -100,6 +100,9 @@ public class CompactionCoordinator extends AbstractServer
implements CompactionCoordinatorService.Iface, LiveTServerSet.Listener {
private static final Logger LOG = LoggerFactory.getLogger(CompactionCoordinator.class);
+
+ private static final Logger STATUS_LOG =
+ LoggerFactory.getLogger(CompactionCoordinator.class.getName() + ".compaction.status");
private static final long TIME_BETWEEN_GC_CHECKS = 5000;
protected static final QueueSummaries QUEUE_SUMMARIES = new QueueSummaries();
@@ -585,8 +588,8 @@ public void updateCompactionStatus(TInfo tinfo, TCredentials credentials,
throw new AccumuloSecurityException(credentials.getPrincipal(),
SecurityErrorCode.PERMISSION_DENIED).asThriftException();
}
- LOG.debug("Compaction status update, id: {}, timestamp: {}, update: {}", externalCompactionId,
- timestamp, update);
+ STATUS_LOG.debug("Compaction status update, id: {}, timestamp: {}, update: {}",
+ externalCompactionId, timestamp, update);
final RunningCompaction rc = RUNNING_CACHE.get(ExternalCompactionId.of(externalCompactionId));
if (null != rc) {
rc.addUpdate(timestamp, update);
diff --git a/server/compaction-coordinator/src/main/java/org/apache/accumulo/coordinator/DeadCompactionDetector.java b/server/compaction-coordinator/src/main/java/org/apache/accumulo/coordinator/DeadCompactionDetector.java
index ba4a575ddfe..b58f06a31ee 100644
--- a/server/compaction-coordinator/src/main/java/org/apache/accumulo/coordinator/DeadCompactionDetector.java
+++ b/server/compaction-coordinator/src/main/java/org/apache/accumulo/coordinator/DeadCompactionDetector.java
@@ -117,8 +117,15 @@ private void detectDeadCompactions() {
});
tabletCompactions.forEach((ecid, extent) -> {
- log.debug("Possible dead compaction detected {} {}", ecid, extent);
- this.deadCompactions.merge(ecid, 1L, Long::sum);
+ var count = this.deadCompactions.merge(ecid, 1L, Long::sum);
+ if (count == 1) {
+ // The first time a possible dead compaction is seen, for quick compactions there is a good
+ // chance that it is already complete instead of dead. In order to avoid spamming the logs
+ // w/ false positives, log the first seen at trace.
+ log.trace("Possible dead compaction detected {} {} {}", ecid, extent, count);
+ } else {
+ log.debug("Possible dead compaction detected {} {} {}", ecid, extent, count);
+ }
});
// Everything left in tabletCompactions is no longer running anywhere and should be failed.
diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java b/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java
index 81744441aa6..216526d328c 100644
--- a/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java
+++ b/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java
@@ -344,12 +344,17 @@ public void run() {
TServerConnection client =
manager.tserverSet.getConnection(location.getServerInstance());
if (client != null) {
- Manager.log.trace("[{}] Requesting TabletServer {} unload {} {}", store.name(),
- location.getServerInstance(), tls.extent, goal.howUnload());
- client.unloadTablet(manager.managerLock, tls.extent, goal.howUnload(),
- manager.getSteadyTime());
- unloaded++;
- totalUnloaded++;
+ try {
+ Manager.log.trace("[{}] Requesting TabletServer {} unload {} {}", store.name(),
+ location.getServerInstance(), tls.extent, goal.howUnload());
+ client.unloadTablet(manager.managerLock, tls.extent, goal.howUnload(),
+ manager.getSteadyTime());
+ unloaded++;
+ totalUnloaded++;
+ } catch (TException tException) {
+ Manager.log.warn("[{}] Failed to request tablet unload {} {} {}", store.name(),
+ location.getServerInstance(), tls.extent, goal.howUnload(), tException);
+ }
} else {
Manager.log.warn("Could not connect to server {}", location);
}
@@ -1036,13 +1041,19 @@ private void flushChanges(TabletLists tLists, WalStateManager wals)
}
tLists.assignments.addAll(tLists.assigned);
for (Assignment a : tLists.assignments) {
- TServerConnection client = manager.tserverSet.getConnection(a.server);
- if (client != null) {
- client.assignTablet(manager.managerLock, a.tablet);
- } else {
- Manager.log.warn("Could not connect to server {}", a.server);
+ try {
+ TServerConnection client = manager.tserverSet.getConnection(a.server);
+ if (client != null) {
+ client.assignTablet(manager.managerLock, a.tablet);
+ manager.assignedTablet(a.tablet);
+ } else {
+ Manager.log.warn("Could not connect to server {} for assignment of {}", a.server,
+ a.tablet);
+ }
+ } catch (TException tException) {
+ Manager.log.warn("Could not connect to server {} for assignment of {}", a.server, a.tablet,
+ tException);
}
- manager.assignedTablet(a.tablet);
}
}
diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServer.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServer.java
index bcba26aa135..94186ed38f3 100644
--- a/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServer.java
+++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServer.java
@@ -76,6 +76,7 @@
import org.apache.accumulo.core.tabletserver.thrift.ActiveScan;
import org.apache.accumulo.core.tabletserver.thrift.NoSuchScanIDException;
import org.apache.accumulo.core.tabletserver.thrift.NotServingTabletException;
+import org.apache.accumulo.core.tabletserver.thrift.ScanServerBusyException;
import org.apache.accumulo.core.tabletserver.thrift.TSampleNotPresentException;
import org.apache.accumulo.core.tabletserver.thrift.TSamplerConfiguration;
import org.apache.accumulo.core.tabletserver.thrift.TabletScanClientService;
@@ -122,6 +123,8 @@
import com.google.common.base.Preconditions;
import com.google.common.collect.Sets;
+import io.micrometer.core.instrument.Tag;
+
public class ScanServer extends AbstractServer
implements TabletScanClientService.Iface, TabletHostingServer {
@@ -199,6 +202,7 @@ private TabletMetadataLoader(Ample ample) {
private volatile boolean serverStopRequested = false;
private ServiceLock scanServerLock;
protected TabletServerScanMetrics scanMetrics;
+ private ScanServerMetrics scanServerMetrics;
private ZooCache managerLockCache;
@@ -243,7 +247,7 @@ public ScanServer(ScanServerOpts opts, String[] args) {
}
tabletMetadataCache =
Caffeine.newBuilder().expireAfterWrite(cacheExpiration, TimeUnit.MILLISECONDS)
- .scheduler(Scheduler.systemScheduler()).build(tabletMetadataLoader);
+ .scheduler(Scheduler.systemScheduler()).recordStats().build(tabletMetadataLoader);
}
delegate = newThriftScanClientHandler(new WriteTracker());
@@ -338,6 +342,7 @@ public void unableToMonitorLockNode(final Exception e) {
// Don't use the normal ServerServices lock content, instead put the server UUID here.
byte[] lockContent = (serverLockUUID.toString() + "," + groupName).getBytes(UTF_8);
+ // wait for 120 seconds with 5 second delay
for (int i = 0; i < 120 / 5; i++) {
zoo.putPersistentData(zLockPath.toString(), new byte[0], NodeExistsPolicy.SKIP);
@@ -371,10 +376,12 @@ public void run() {
MetricsInfo metricsInfo = getContext().getMetricsInfo();
metricsInfo.addServiceTags(getApplicationName(), clientAddress);
+ metricsInfo.addCommonTags(List.of(Tag.of("resource.group", groupName)));
scanMetrics = new TabletServerScanMetrics();
+ scanServerMetrics = new ScanServerMetrics(tabletMetadataCache);
- metricsInfo.addMetricsProducers(scanMetrics);
+ metricsInfo.addMetricsProducers(scanMetrics, scanServerMetrics);
metricsInfo.init();
// We need to set the compaction manager so that we don't get an NPE in CompactableImpl.close
@@ -657,6 +664,19 @@ private Map reserveFilesInner(Collection ex
}
}
+ @VisibleForTesting
+ ScanReservation reserveFilesInstrumented(Map> extents)
+ throws AccumuloException {
+ long start = System.nanoTime();
+ try {
+ return reserveFiles(extents);
+ } finally {
+ scanServerMetrics.getReservationTimer().record(System.nanoTime() - start,
+ TimeUnit.NANOSECONDS);
+ }
+
+ }
+
protected ScanReservation reserveFiles(Map> extents)
throws AccumuloException {
@@ -687,6 +707,16 @@ protected ScanReservation reserveFiles(Map> extents)
return new ScanReservation(tabletsMetadata, myReservationId, failures);
}
+ private ScanReservation reserveFilesInstrumented(long scanId) throws NoSuchScanIDException {
+ long start = System.nanoTime();
+ try {
+ return reserveFiles(scanId);
+ } finally {
+ scanServerMetrics.getReservationTimer().record(System.nanoTime() - start,
+ TimeUnit.NANOSECONDS);
+ }
+ }
+
protected ScanReservation reserveFiles(long scanId) throws NoSuchScanIDException {
var session = (ScanSession) sessionManager.getSession(scanId);
if (session == null) {
@@ -875,7 +905,7 @@ public InitialScan startScan(TInfo tinfo, TCredentials credentials, TKeyExtent t
KeyExtent extent = getKeyExtent(textent);
try (ScanReservation reservation =
- reserveFiles(Map.of(extent, Collections.singletonList(range)))) {
+ reserveFilesInstrumented(Map.of(extent, Collections.singletonList(range)))) {
if (reservation.getFailures().containsKey(textent)) {
throw new NotServingTabletException(extent.toThrift());
@@ -889,7 +919,9 @@ batchTimeOut, classLoaderContext, executionHints, getScanTabletResolver(tablet),
busyTimeout);
return is;
-
+ } catch (ScanServerBusyException be) {
+ scanServerMetrics.incrementBusy();
+ throw be;
} catch (AccumuloException | IOException e) {
LOG.error("Error starting scan", e);
throw new RuntimeException(e);
@@ -905,6 +937,9 @@ public ScanResult continueScan(TInfo tinfo, long scanID, long busyTimeout)
try (ScanReservation reservation = reserveFiles(scanID)) {
Preconditions.checkState(reservation.getFailures().isEmpty());
return delegate.continueScan(tinfo, scanID, busyTimeout);
+ } catch (ScanServerBusyException be) {
+ scanServerMetrics.incrementBusy();
+ throw be;
}
}
@@ -933,7 +968,7 @@ public InitialMultiScan startMultiScan(TInfo tinfo, TCredentials credentials,
batch.put(extent, entry.getValue());
}
- try (ScanReservation reservation = reserveFiles(batch)) {
+ try (ScanReservation reservation = reserveFilesInstrumented(batch)) {
HashMap tablets = new HashMap<>();
reservation.getTabletMetadataExtents().forEach(extent -> {
@@ -950,6 +985,9 @@ public InitialMultiScan startMultiScan(TInfo tinfo, TCredentials credentials,
LOG.trace("started scan: {}", ims.getScanID());
return ims;
+ } catch (ScanServerBusyException be) {
+ scanServerMetrics.incrementBusy();
+ throw be;
} catch (TException e) {
LOG.error("Error starting scan", e);
throw e;
@@ -967,6 +1005,9 @@ public MultiScanResult continueMultiScan(TInfo tinfo, long scanID, long busyTime
try (ScanReservation reservation = reserveFiles(scanID)) {
Preconditions.checkState(reservation.getFailures().isEmpty());
return delegate.continueMultiScan(tinfo, scanID, busyTimeout);
+ } catch (ScanServerBusyException be) {
+ scanServerMetrics.incrementBusy();
+ throw be;
}
}
diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServerMetrics.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServerMetrics.java
new file mode 100644
index 00000000000..1a516b597bb
--- /dev/null
+++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServerMetrics.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.accumulo.tserver;
+
+import org.apache.accumulo.core.dataImpl.KeyExtent;
+import org.apache.accumulo.core.metadata.schema.TabletMetadata;
+import org.apache.accumulo.core.metrics.MetricsProducer;
+
+import com.github.benmanes.caffeine.cache.LoadingCache;
+
+import io.micrometer.core.instrument.Counter;
+import io.micrometer.core.instrument.MeterRegistry;
+import io.micrometer.core.instrument.Timer;
+import io.micrometer.core.instrument.binder.cache.CaffeineCacheMetrics;
+
+public class ScanServerMetrics implements MetricsProducer {
+
+ private Timer reservationTimer;
+ private Counter busyTimeoutCount;
+
+ private final LoadingCache tabletMetadataCache;
+
+ public ScanServerMetrics(final LoadingCache tabletMetadataCache) {
+ this.tabletMetadataCache = tabletMetadataCache;
+ }
+
+ @Override
+ public void registerMetrics(MeterRegistry registry) {
+ reservationTimer = Timer.builder(MetricsProducer.METRICS_SCAN_RESERVATION_TIMER)
+ .description("Time to reserve a tablets files for scan").register(registry);
+ busyTimeoutCount = Counter.builder(METRICS_SCAN_BUSY_TIMEOUT_COUNTER)
+ .description("The number of scans where a busy timeout happened").register(registry);
+ CaffeineCacheMetrics.monitor(registry, tabletMetadataCache, METRICS_SCAN_TABLET_METADATA_CACHE);
+ }
+
+ public Timer getReservationTimer() {
+ return reservationTimer;
+ }
+
+ public void incrementBusy() {
+ busyTimeoutCount.increment();
+ }
+}
diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/ThriftScanClientHandler.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/ThriftScanClientHandler.java
index d1ff17347ae..8a99b2315e4 100644
--- a/server/tserver/src/main/java/org/apache/accumulo/tserver/ThriftScanClientHandler.java
+++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/ThriftScanClientHandler.java
@@ -284,7 +284,7 @@ protected ScanResult continueScan(TInfo tinfo, long scanID, SingleScanSession sc
server.getSessionManager().removeSession(scanID);
TabletBase tablet = scanSession.getTabletResolver().getTablet(scanSession.extent);
if (busyTimeout > 0) {
- server.getScanMetrics().incrementScanBusyTimeout(1.0D);
+ server.getScanMetrics().incrementBusy(1.0D);
throw new ScanServerBusyException();
} else if (tablet == null || tablet.isClosed()) {
throw new NotServingTabletException(scanSession.extent.toThrift());
@@ -495,7 +495,7 @@ private MultiScanResult continueMultiScan(long scanID, MultiScanSession session,
} catch (CancellationException ce) {
server.getSessionManager().removeSession(scanID);
if (busyTimeout > 0) {
- server.getScanMetrics().incrementScanBusyTimeout(1.0D);
+ server.getScanMetrics().incrementBusy(1.0D);
throw new ScanServerBusyException();
} else {
log.warn("Failed to get multiscan result", ce);
diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/metrics/TabletServerMetricsUtil.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/metrics/TabletServerMetricsUtil.java
index f508a6871ed..599065d7c76 100644
--- a/server/tserver/src/main/java/org/apache/accumulo/tserver/metrics/TabletServerMetricsUtil.java
+++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/metrics/TabletServerMetricsUtil.java
@@ -24,8 +24,6 @@
/**
* Wrapper around extracting metrics from a TabletServer instance
- *
- * Necessary to support both old custom JMX metrics and Hadoop Metrics2
*/
public class TabletServerMetricsUtil {
diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/metrics/TabletServerScanMetrics.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/metrics/TabletServerScanMetrics.java
index 492a6a8c803..8e066dd7f71 100644
--- a/server/tserver/src/main/java/org/apache/accumulo/tserver/metrics/TabletServerScanMetrics.java
+++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/metrics/TabletServerScanMetrics.java
@@ -39,7 +39,7 @@ public class TabletServerScanMetrics implements MetricsProducer {
private Counter startScanCalls;
private Counter continueScanCalls;
private Counter closeScanCalls;
- private Counter busyTimeoutReturned;
+ private Counter busyTimeoutCount;
private final LongAdder lookupCount = new LongAdder();
private final LongAdder queryResultCount = new LongAdder();
@@ -114,8 +114,8 @@ public void incrementCloseScan(double value) {
closeScanCalls.increment(value);
}
- public void incrementScanBusyTimeout(double value) {
- busyTimeoutReturned.increment(value);
+ public void incrementBusy(double value) {
+ busyTimeoutCount.increment(value);
}
@Override
@@ -133,17 +133,19 @@ public void registerMetrics(MeterRegistry registry) {
.description("calls to continue a scan / multiscan").register(registry);
closeScanCalls = Counter.builder(METRICS_SCAN_CLOSE)
.description("calls to close a scan / multiscan").register(registry);
- busyTimeoutReturned = Counter.builder(METRICS_SCAN_BUSY_TIMEOUT)
- .description("times that a scan has timed out in the queue").register(registry);
- Gauge.builder(METRICS_TSERVER_QUERIES, this, TabletServerScanMetrics::getLookupCount)
+ busyTimeoutCount = Counter.builder(METRICS_SCAN_BUSY_TIMEOUT_COUNTER)
+ .description("The number of scans where a busy timeout happened").register(registry);
+ Gauge.builder(METRICS_SCAN_QUERIES, this, TabletServerScanMetrics::getLookupCount)
.description("Number of queries").register(registry);
- Gauge.builder(METRICS_TSERVER_SCAN_RESULTS, this, TabletServerScanMetrics::getQueryResultCount)
+ Gauge
+ .builder(METRICS_SCAN_QUERY_SCAN_RESULTS, this,
+ TabletServerScanMetrics::getQueryResultCount)
.description("Query rate (entries/sec)").register(registry);
Gauge
- .builder(METRICS_TSERVER_SCAN_RESULTS_BYTES, this,
+ .builder(METRICS_SCAN_QUERY_SCAN_RESULTS_BYTES, this,
TabletServerScanMetrics::getQueryByteCount)
.description("Query rate (bytes/sec)").register(registry);
- Gauge.builder(METRICS_TSERVER_SCANNED_ENTRIES, this, TabletServerScanMetrics::getScannedCount)
+ Gauge.builder(METRICS_SCAN_SCANNED_ENTRIES, this, TabletServerScanMetrics::getScannedCount)
.description("Scanned rate").register(registry);
}
diff --git a/server/tserver/src/test/java/org/apache/accumulo/tserver/ScanServerTest.java b/server/tserver/src/test/java/org/apache/accumulo/tserver/ScanServerTest.java
index fdb79e1b00f..b7f64e05241 100644
--- a/server/tserver/src/test/java/org/apache/accumulo/tserver/ScanServerTest.java
+++ b/server/tserver/src/test/java/org/apache/accumulo/tserver/ScanServerTest.java
@@ -101,6 +101,10 @@ protected ScanReservation reserveFiles(long scanId) throws NoSuchScanIDException
return reservation;
}
+ @Override
+ ScanReservation reserveFilesInstrumented(Map> extents) {
+ return reservation;
+ }
}
private ThriftScanClientHandler handler;
diff --git a/test/src/main/java/org/apache/accumulo/test/metrics/MetricsIT.java b/test/src/main/java/org/apache/accumulo/test/metrics/MetricsIT.java
index aa7bae035a9..0782adb70df 100644
--- a/test/src/main/java/org/apache/accumulo/test/metrics/MetricsIT.java
+++ b/test/src/main/java/org/apache/accumulo/test/metrics/MetricsIT.java
@@ -100,9 +100,12 @@ public void confirmMetricsPublished() throws Exception {
cluster.stop();
Set unexpectedMetrics = Set.of(METRICS_SCAN_YIELDS, METRICS_UPDATE_ERRORS,
- METRICS_REPLICATION_QUEUE, METRICS_COMPACTOR_MAJC_STUCK, METRICS_SCAN_BUSY_TIMEOUT);
+ METRICS_REPLICATION_QUEUE, METRICS_COMPACTOR_MAJC_STUCK, METRICS_SCAN_BUSY_TIMEOUT_COUNTER);
+ // add sserver as flaky until scan server included in mini tests.
Set flakyMetrics = Set.of(METRICS_GC_WAL_ERRORS, METRICS_FATE_TYPE_IN_PROGRESS,
- METRICS_MANAGER_BALANCER_NEED_MIGRATION, METRICS_MANAGER_BALANCER_MIGRATING);
+ METRICS_SCAN_BUSY_TIMEOUT_COUNTER, METRICS_SCAN_RESERVATION_TIMER,
+ METRICS_SCAN_TABLET_METADATA_CACHE, METRICS_MANAGER_BALANCER_NEED_MIGRATION,
+ METRICS_MANAGER_BALANCER_MIGRATING);
Map expectedMetricNames = this.getMetricFields();
flakyMetrics.forEach(expectedMetricNames::remove); // might not see these