Skip to content

Commit

Permalink
merge latest 2.1 changes
Browse files Browse the repository at this point in the history
  • Loading branch information
Ed Coleman committed May 3, 2024
2 parents d0d9997 + c488f78 commit a59090e
Show file tree
Hide file tree
Showing 12 changed files with 224 additions and 84 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -382,9 +382,6 @@ public enum Property {
MANAGER_WALOG_CLOSER_IMPLEMETATION("manager.walog.closer.implementation",
"org.apache.accumulo.server.manager.recovery.HadoopLogCloser", PropertyType.CLASSNAME,
"A class that implements a mechanism to steal write access to a write-ahead log.", "1.5.0"),
@Deprecated
MANAGER_FATE_METRICS_ENABLED("manager.fate.metrics.enabled", "true", PropertyType.BOOLEAN,
"Enable reporting of FATE metrics in JMX (and logging with Hadoop Metrics2).", "1.9.3"),
MANAGER_FATE_METRICS_MIN_UPDATE_INTERVAL("manager.fate.metrics.min.update.interval", "60s",
PropertyType.TIMEDURATION, "Limit calls from metric sinks to zookeeper to update interval.",
"1.9.3"),
Expand Down Expand Up @@ -924,9 +921,6 @@ public enum Property {
+ " and possibly compacted. Legal values are: compact - which both flushes and compacts the"
+ " metadata; flush - which flushes only (compactions may be triggered if required); or none.",
"1.10.0"),
@Deprecated
GC_METRICS_ENABLED("gc.metrics.enabled", "true", PropertyType.BOOLEAN,
"Enable detailed gc metrics reporting with hadoop metrics.", "1.10.0"),

// properties that are specific to the monitor server behavior
MONITOR_PREFIX("monitor.", null, PropertyType.PREFIX,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -314,37 +314,6 @@
* <td></td>
* </tr>
* <tr>
* <td>queries</td>
* <td>Gauge</td>
* <td>{@value #METRICS_TSERVER_QUERIES}</td>
* <td>Gauge</td>
* <td></td>
* </tr>
* <tr>
* <td>scannedRate</td>
* <td>Gauge</td>
* <td>{@value #METRICS_TSERVER_SCANNED_ENTRIES}</td>
* <td>Gauge</td>
* <td>Prior to 2.1.0 this metric was reported as a rate, it is now the count and the rate can be
* derived</td>
* </tr>
* <tr>
* <td>queryRate</td>
* <td>Gauge</td>
* <td>{@value #METRICS_TSERVER_SCAN_RESULTS}</td>
* <td>Gauge</td>
* <td>Prior to 2.1.0 this metric was reported as a rate, it is now the count and the rate can be
* derived</td>
* </tr>
* <tr>
* <td>queryByteRate</td>
* <td>Gauge</td>
* <td>{@value #METRICS_TSERVER_SCAN_RESULTS_BYTES}</td>
* <td>Gauge</td>
* <td>Prior to 2.1.0 this metric was reported as a rate, it is now the count and the rate can be
* derived</td>
* </tr>
* <tr>
* <td>ingestRate</td>
* <td>Gauge</td>
* <td>{@value #METRICS_TSERVER_INGEST_MUTATIONS}</td>
Expand All @@ -367,6 +336,28 @@
* <td>Gauge</td>
* <td></td>
* </tr>
* <!-- scan server -->
* <tr>
* <th>N/A</th>
* <th>N/A</th>
* <th>{@value #METRICS_SCAN_RESERVATION_TIMER}</th>
* <th>Timer</th>
* <th>Time to reserve a tablets files for scan</th>
* </tr>
* <tr>
* <th>N/A</th>
* <th>N/A</th>
* <th>{@value #METRICS_SCAN_BUSY_TIMEOUT_COUNTER}</th>
* <th>Counter</th>
* <th>Count of the scans where a busy timeout happened</th>
* </tr>
* <tr>
* <th>N/A</th>
* <th>N/A</th>
* <th>{@value #METRICS_SCAN_TABLET_METADATA_CACHE}</th>
* <th>Cache</th>
* <th>scan server tablet cache metrics</th>
* </tr>
* <!-- scans -->
* <tr>
* <td>scan</td>
Expand Down Expand Up @@ -418,12 +409,36 @@
* <td></td>
* </tr>
* <tr>
* <td>N/A</td>
* <td>N/A</td>
* <td>{@value #METRICS_SCAN_BUSY_TIMEOUT}</td>
* <td>Counter</td>
* <td>queries</td>
* <td>Gauge</td>
* <td>{@value #METRICS_SCAN_QUERIES}</td>
* <td>Gauge</td>
* <td></td>
* </tr>
* <tr>
* <td>scannedRate</td>
* <td>Gauge</td>
* <td>{@value #METRICS_SCAN_SCANNED_ENTRIES}</td>
* <td>Gauge</td>
* <td>Prior to 2.1.0 this metric was reported as a rate, it is now the count and the rate can be
* derived</td>
* </tr>
* <tr>
* <td>queryRate</td>
* <td>Gauge</td>
* <td>{@value #METRICS_SCAN_QUERY_SCAN_RESULTS}</td>
* <td>Gauge</td>
* <td>Prior to 2.1.0 this metric was reported as a rate, it is now the count and the rate can be
* derived</td>
* </tr>
* <tr>
* <td>queryByteRate</td>
* <td>Gauge</td>
* <td>{@value #METRICS_SCAN_QUERY_SCAN_RESULTS_BYTES}</td>
* <td>Gauge</td>
* <td>Prior to 2.1.0 this metric was reported as a rate, it is now the count and the rate can be
* derived</td>
* </tr>
* <!-- major compactions -->
* <tr>
* <td>{i|e}_{compactionServiceName}_{executor_name}_queued</td>
Expand Down Expand Up @@ -605,15 +620,22 @@ public interface MetricsProducer {
String METRICS_REPLICATION_PEERS = METRICS_REPLICATION_PREFIX + "peers";
String METRICS_REPLICATION_THREADS = METRICS_REPLICATION_PREFIX + "threads";

String METRICS_SCAN_PREFIX = "accumulo.tserver.scans.";
String METRICS_SCAN_PREFIX = "accumulo.scan.";
String METRICS_SCAN_TIMES = METRICS_SCAN_PREFIX + "times";
String METRICS_SCAN_OPEN_FILES = METRICS_SCAN_PREFIX + "files.open";
String METRICS_SCAN_RESULTS = METRICS_SCAN_PREFIX + "result";
String METRICS_SCAN_YIELDS = METRICS_SCAN_PREFIX + "yields";
String METRICS_SCAN_START = METRICS_SCAN_PREFIX + "start";
String METRICS_SCAN_CONTINUE = METRICS_SCAN_PREFIX + "continue";
String METRICS_SCAN_CLOSE = METRICS_SCAN_PREFIX + "close";
String METRICS_SCAN_BUSY_TIMEOUT = METRICS_SCAN_PREFIX + "busy.timeout";
String METRICS_SCAN_BUSY_TIMEOUT_COUNTER = METRICS_SCAN_PREFIX + "busy.timeout.count";
String METRICS_SCAN_RESERVATION_TIMER = METRICS_SCAN_PREFIX + "reservation.timer";
String METRICS_SCAN_QUERIES = METRICS_SCAN_PREFIX + "queries";
String METRICS_SCAN_QUERY_SCAN_RESULTS = METRICS_SCAN_PREFIX + "query.results";
String METRICS_SCAN_QUERY_SCAN_RESULTS_BYTES = METRICS_SCAN_PREFIX + "query.results.bytes";
String METRICS_SCAN_SCANNED_ENTRIES = METRICS_SCAN_PREFIX + "query.scanned.entries";

String METRICS_SCAN_TABLET_METADATA_CACHE = METRICS_SCAN_PREFIX + "tablet.metadata.cache";

String METRICS_TSERVER_PREFIX = "accumulo.tserver.";
String METRICS_TSERVER_ENTRIES = METRICS_TSERVER_PREFIX + "entries";
Expand All @@ -629,14 +651,10 @@ public interface MetricsProducer {
String METRICS_TSERVER_TABLETS_ONLINE = METRICS_TSERVER_PREFIX + "tablets.online";
String METRICS_TSERVER_TABLETS_OPENING = METRICS_TSERVER_PREFIX + "tablets.opening";
String METRICS_TSERVER_TABLETS_UNOPENED = METRICS_TSERVER_PREFIX + "tablets.unopened";
String METRICS_TSERVER_QUERIES = METRICS_TSERVER_PREFIX + "queries";
String METRICS_TSERVER_TABLETS_FILES = METRICS_TSERVER_PREFIX + "tablets.files";
String METRICS_TSERVER_HOLD = METRICS_TSERVER_PREFIX + "hold";
String METRICS_TSERVER_INGEST_MUTATIONS = METRICS_TSERVER_PREFIX + "ingest.mutations";
String METRICS_TSERVER_INGEST_BYTES = METRICS_TSERVER_PREFIX + "ingest.bytes";
String METRICS_TSERVER_SCAN_RESULTS = METRICS_TSERVER_PREFIX + "scan.results";
String METRICS_TSERVER_SCAN_RESULTS_BYTES = METRICS_TSERVER_PREFIX + "scan.results.bytes";
String METRICS_TSERVER_SCANNED_ENTRIES = METRICS_TSERVER_PREFIX + "scan.scanned.entries";

String METRICS_THRIFT_PREFIX = "accumulo.thrift.";
String METRICS_THRIFT_EXECUTE = METRICS_THRIFT_PREFIX + "execute";
Expand Down Expand Up @@ -673,7 +691,7 @@ default Map<String,String> getMetricFields() {
fields.put((String) f.get(MetricsProducer.class), f.getName());
} catch (IllegalArgumentException | IllegalAccessException e) {
// this shouldn't happen, but let's log it anyway
LOG.error("Error getting metric value for field: " + f.getName());
LOG.error("Error getting metric value for field: {}", f.getName());
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,9 @@ public class CompactionCoordinator extends AbstractServer
implements CompactionCoordinatorService.Iface, LiveTServerSet.Listener {

private static final Logger LOG = LoggerFactory.getLogger(CompactionCoordinator.class);

private static final Logger STATUS_LOG =
LoggerFactory.getLogger(CompactionCoordinator.class.getName() + ".compaction.status");
private static final long TIME_BETWEEN_GC_CHECKS = 5000;
protected static final QueueSummaries QUEUE_SUMMARIES = new QueueSummaries();

Expand Down Expand Up @@ -585,8 +588,8 @@ public void updateCompactionStatus(TInfo tinfo, TCredentials credentials,
throw new AccumuloSecurityException(credentials.getPrincipal(),
SecurityErrorCode.PERMISSION_DENIED).asThriftException();
}
LOG.debug("Compaction status update, id: {}, timestamp: {}, update: {}", externalCompactionId,
timestamp, update);
STATUS_LOG.debug("Compaction status update, id: {}, timestamp: {}, update: {}",
externalCompactionId, timestamp, update);
final RunningCompaction rc = RUNNING_CACHE.get(ExternalCompactionId.of(externalCompactionId));
if (null != rc) {
rc.addUpdate(timestamp, update);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,15 @@ private void detectDeadCompactions() {
});

tabletCompactions.forEach((ecid, extent) -> {
log.debug("Possible dead compaction detected {} {}", ecid, extent);
this.deadCompactions.merge(ecid, 1L, Long::sum);
var count = this.deadCompactions.merge(ecid, 1L, Long::sum);
if (count == 1) {
// The first time a possible dead compaction is seen, for quick compactions there is a good
// chance that it is already complete instead of dead. In order to avoid spamming the logs
// w/ false positives, log the first seen at trace.
log.trace("Possible dead compaction detected {} {} {}", ecid, extent, count);
} else {
log.debug("Possible dead compaction detected {} {} {}", ecid, extent, count);
}
});

// Everything left in tabletCompactions is no longer running anywhere and should be failed.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -344,12 +344,17 @@ public void run() {
TServerConnection client =
manager.tserverSet.getConnection(location.getServerInstance());
if (client != null) {
Manager.log.trace("[{}] Requesting TabletServer {} unload {} {}", store.name(),
location.getServerInstance(), tls.extent, goal.howUnload());
client.unloadTablet(manager.managerLock, tls.extent, goal.howUnload(),
manager.getSteadyTime());
unloaded++;
totalUnloaded++;
try {
Manager.log.trace("[{}] Requesting TabletServer {} unload {} {}", store.name(),
location.getServerInstance(), tls.extent, goal.howUnload());
client.unloadTablet(manager.managerLock, tls.extent, goal.howUnload(),
manager.getSteadyTime());
unloaded++;
totalUnloaded++;
} catch (TException tException) {
Manager.log.warn("[{}] Failed to request tablet unload {} {} {}", store.name(),
location.getServerInstance(), tls.extent, goal.howUnload(), tException);
}
} else {
Manager.log.warn("Could not connect to server {}", location);
}
Expand Down Expand Up @@ -1036,13 +1041,19 @@ private void flushChanges(TabletLists tLists, WalStateManager wals)
}
tLists.assignments.addAll(tLists.assigned);
for (Assignment a : tLists.assignments) {
TServerConnection client = manager.tserverSet.getConnection(a.server);
if (client != null) {
client.assignTablet(manager.managerLock, a.tablet);
} else {
Manager.log.warn("Could not connect to server {}", a.server);
try {
TServerConnection client = manager.tserverSet.getConnection(a.server);
if (client != null) {
client.assignTablet(manager.managerLock, a.tablet);
manager.assignedTablet(a.tablet);
} else {
Manager.log.warn("Could not connect to server {} for assignment of {}", a.server,
a.tablet);
}
} catch (TException tException) {
Manager.log.warn("Could not connect to server {} for assignment of {}", a.server, a.tablet,
tException);
}
manager.assignedTablet(a.tablet);
}
}

Expand Down
Loading

0 comments on commit a59090e

Please sign in to comment.