From 202874594185c7497960a43262b12d3ab9d78d7d Mon Sep 17 00:00:00 2001 From: Keith Turner Date: Tue, 24 Oct 2023 20:50:18 -0400 Subject: [PATCH 01/12] logs row ranges for files that have them (#3885) There was code that was logging only tablets file names. Now that files can have a row range, need to log the range if it exists. --- .../accumulo/core/logging/TabletLogger.java | 16 +++++++------- .../core/metadata/AbstractTabletFile.java | 21 +++++++++++++++++++ .../core/metadata/CompactableFileImpl.java | 2 +- .../accumulo/core/metadata/TabletFile.java | 5 +++++ .../tserver/tablet/CompactableImpl.java | 15 ++++++------- .../apache/accumulo/test/ComprehensiveIT.java | 1 - 6 files changed, 44 insertions(+), 16 deletions(-) diff --git a/core/src/main/java/org/apache/accumulo/core/logging/TabletLogger.java b/core/src/main/java/org/apache/accumulo/core/logging/TabletLogger.java index 100cc318ccf..e632971cf5d 100644 --- a/core/src/main/java/org/apache/accumulo/core/logging/TabletLogger.java +++ b/core/src/main/java/org/apache/accumulo/core/logging/TabletLogger.java @@ -28,6 +28,7 @@ import org.apache.accumulo.core.client.admin.CompactionConfig; import org.apache.accumulo.core.client.admin.compaction.CompactableFile; import org.apache.accumulo.core.dataImpl.KeyExtent; +import org.apache.accumulo.core.metadata.CompactableFileImpl; import org.apache.accumulo.core.metadata.StoredTabletFile; import org.apache.accumulo.core.metadata.TServerInstance; import org.apache.accumulo.core.metadata.TabletFile; @@ -117,32 +118,33 @@ private static String getSize(Collection files) { * Lazily converts TableFile to file names. The lazy part is really important because when it is * not called with log.isDebugEnabled(). */ - private static Collection asFileNames(Collection files) { - return Collections2.transform(files, CompactableFile::getFileName); + private static Collection asMinimalString(Collection files) { + return Collections2.transform(files, + cf -> CompactableFileImpl.toStoredTabletFile(cf).toMinimalString()); } public static void selected(KeyExtent extent, CompactionKind kind, Collection inputs) { fileLog.trace("{} changed compaction selection set for {} new set {}", extent, kind, - Collections2.transform(inputs, StoredTabletFile::getFileName)); + Collections2.transform(inputs, StoredTabletFile::toMinimalString)); } public static void compacting(KeyExtent extent, CompactionJob job, CompactionConfig config) { if (fileLog.isDebugEnabled()) { if (config == null) { fileLog.debug("Compacting {} on {} for {} from {} size {}", extent, job.getExecutor(), - job.getKind(), asFileNames(job.getFiles()), getSize(job.getFiles())); + job.getKind(), asMinimalString(job.getFiles()), getSize(job.getFiles())); } else { fileLog.debug("Compacting {} on {} for {} from {} size {} config {}", extent, - job.getExecutor(), job.getKind(), asFileNames(job.getFiles()), getSize(job.getFiles()), - config); + job.getExecutor(), job.getKind(), asMinimalString(job.getFiles()), + getSize(job.getFiles()), config); } } } public static void compacted(KeyExtent extent, CompactionJob job, StoredTabletFile output) { fileLog.debug("Compacted {} for {} created {} from {}", extent, job.getKind(), output, - asFileNames(job.getFiles())); + asMinimalString(job.getFiles())); } public static void flushed(KeyExtent extent, Optional newDatafile) { diff --git a/core/src/main/java/org/apache/accumulo/core/metadata/AbstractTabletFile.java b/core/src/main/java/org/apache/accumulo/core/metadata/AbstractTabletFile.java index 0957c07a9d0..ae0d46fbf21 100644 --- a/core/src/main/java/org/apache/accumulo/core/metadata/AbstractTabletFile.java +++ b/core/src/main/java/org/apache/accumulo/core/metadata/AbstractTabletFile.java @@ -20,6 +20,7 @@ import java.util.Objects; +import org.apache.accumulo.core.data.ByteSequence; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Range; import org.apache.hadoop.fs.Path; @@ -82,4 +83,24 @@ private static boolean isExclusiveKey(Key key) { return row.length() > 0 && row.byteAt(row.length() - 1) == (byte) 0x00; } + private static String stripZeroTail(ByteSequence row) { + if (row.byteAt(row.length() - 1) == (byte) 0x00) { + return row.subSequence(0, row.length() - 1).toString(); + } + return row.toString(); + } + + @Override + public String toMinimalString() { + if (hasRange()) { + String startRow = + range.isInfiniteStartKey() ? "-inf" : stripZeroTail(range.getStartKey().getRowData()); + String endRow = + range.isInfiniteStopKey() ? "+inf" : stripZeroTail(range.getEndKey().getRowData()); + return getFileName() + " (" + startRow + "," + endRow + "]"; + } else { + return getFileName(); + } + } + } diff --git a/core/src/main/java/org/apache/accumulo/core/metadata/CompactableFileImpl.java b/core/src/main/java/org/apache/accumulo/core/metadata/CompactableFileImpl.java index 9944245f970..e3ff1a334ae 100644 --- a/core/src/main/java/org/apache/accumulo/core/metadata/CompactableFileImpl.java +++ b/core/src/main/java/org/apache/accumulo/core/metadata/CompactableFileImpl.java @@ -89,6 +89,6 @@ public static StoredTabletFile toStoredTabletFile(CompactableFile cf) { @Override public String toString() { - return "[" + storedTabletFile.getFileName() + ", " + dataFileValue + "]"; + return "[" + storedTabletFile.toMinimalString() + ", " + dataFileValue + "]"; } } diff --git a/core/src/main/java/org/apache/accumulo/core/metadata/TabletFile.java b/core/src/main/java/org/apache/accumulo/core/metadata/TabletFile.java index b31b6926d3e..8b90bd78803 100644 --- a/core/src/main/java/org/apache/accumulo/core/metadata/TabletFile.java +++ b/core/src/main/java/org/apache/accumulo/core/metadata/TabletFile.java @@ -52,4 +52,9 @@ public interface TabletFile { * */ boolean hasRange(); + + /** + * @return a string with the filename and row range if there is one. + */ + String toMinimalString(); } diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/CompactableImpl.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/CompactableImpl.java index 2d7da09b468..8fe91dab5be 100644 --- a/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/CompactableImpl.java +++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/CompactableImpl.java @@ -202,7 +202,7 @@ public FileManager(KeyExtent extent, Collection extCompactingF this.selectStatus = FileSelectionStatus.RESERVED; log.debug("Selected compaction status initialized from external compactions {} {} {} {}", - getExtent(), selectStatus, initiallySelectedAll, asFileNames(selectedFiles)); + getExtent(), selectStatus, initiallySelectedAll, asMinimalString(selectedFiles)); } } @@ -261,7 +261,7 @@ void finishSelection(Set selected, boolean allSelected) { selectedFiles.addAll(selected); initiallySelectedAll = allSelected; log.trace("Selected compaction status changed {} {} {} {}", getExtent(), selectStatus, - initiallySelectedAll, asFileNames(selectedFiles)); + initiallySelectedAll, asMinimalString(selectedFiles)); TabletLogger.selected(getExtent(), selectKind, selectedFiles); } @@ -407,7 +407,8 @@ && getNanoTime() - selectedTimeNanos > selectedExpirationDuration.toNanos() if (selectKind == job.getKind()) { if (!selectedFiles.containsAll(jobFiles)) { log.trace("Ignoring {} compaction that does not contain selected files {} {} {}", - job.getKind(), getExtent(), asFileNames(selectedFiles), asFileNames(jobFiles)); + job.getKind(), getExtent(), asMinimalString(selectedFiles), + asMinimalString(jobFiles)); return false; } } else { @@ -417,7 +418,7 @@ && getNanoTime() - selectedTimeNanos > selectedExpirationDuration.toNanos() } } else if (!Collections.disjoint(selectedFiles, jobFiles)) { log.trace("Ingoing compaction that overlaps with selected files {} {} {}", getExtent(), - job.getKind(), asFileNames(Sets.intersection(selectedFiles, jobFiles))); + job.getKind(), asMinimalString(Sets.intersection(selectedFiles, jobFiles))); return false; } break; @@ -486,7 +487,7 @@ private void selectedCompactionCompleted(CompactionJob job, Set {}", getExtent(), - asFileNames(jobFiles), newFile.orElse(null)); + asMinimalString(jobFiles), newFile.orElse(null)); } else { log.debug("Canceled selected compaction completed {} but others still running ", getExtent()); @@ -874,8 +875,8 @@ private void selectFiles() { } - static Collection asFileNames(Set files) { - return Collections2.transform(files, StoredTabletFile::getFileName); + static Collection asMinimalString(Set files) { + return Collections2.transform(files, StoredTabletFile::toMinimalString); } @Override diff --git a/test/src/main/java/org/apache/accumulo/test/ComprehensiveIT.java b/test/src/main/java/org/apache/accumulo/test/ComprehensiveIT.java index 4ea8843e004..c33c9f91336 100644 --- a/test/src/main/java/org/apache/accumulo/test/ComprehensiveIT.java +++ b/test/src/main/java/org/apache/accumulo/test/ComprehensiveIT.java @@ -176,7 +176,6 @@ public void testMergeAndSplit() throws Exception { client.tableOperations().merge(table, null, null); assertEquals(Set.of(), new TreeSet<>(client.tableOperations().listSplits(table))); verifyData(client, table, AUTHORIZATIONS, expectedData); - } } From b5b48c3e389186e1229be7b8e5ab9e30d87dd52d Mon Sep 17 00:00:00 2001 From: Keith Turner Date: Tue, 24 Oct 2023 20:50:52 -0400 Subject: [PATCH 02/12] adds range to compactable file (#3883) Now that tablet files have an associated range, this information should be made available to compaction plugins. Making this information available to plugins enable uses cases like selecting files with ranges for compaction. This commit makes that information avialable by adding a getRange method to CompactableFile --- .../admin/compaction/CompactableFile.java | 19 +++++++++++++++++++ .../core/metadata/CompactableFileImpl.java | 11 +++++++++++ 2 files changed, 30 insertions(+) diff --git a/core/src/main/java/org/apache/accumulo/core/client/admin/compaction/CompactableFile.java b/core/src/main/java/org/apache/accumulo/core/client/admin/compaction/CompactableFile.java index baf5f04949b..36b2d7b0ea9 100644 --- a/core/src/main/java/org/apache/accumulo/core/client/admin/compaction/CompactableFile.java +++ b/core/src/main/java/org/apache/accumulo/core/client/admin/compaction/CompactableFile.java @@ -20,6 +20,7 @@ import java.net.URI; +import org.apache.accumulo.core.data.Range; import org.apache.accumulo.core.metadata.CompactableFileImpl; /** @@ -33,6 +34,16 @@ public interface CompactableFile { public URI getUri(); + /** + * @return A range associated with the file. If a file has an associated range then Accumulo will + * limit reads to within the range. Not all files have an associated range, it a file does + * not have a range then an infinite range is returned. The URI plus this range uniquely + * identify a file. + * + * @since 3.1.0 + */ + public Range getRange(); + public long getEstimatedSize(); public long getEstimatedEntries(); @@ -41,4 +52,12 @@ static CompactableFile create(URI uri, long estimatedSize, long estimatedEntries return new CompactableFileImpl(uri, estimatedSize, estimatedEntries); } + /** + * Creates a new CompactableFile object that implements this interface. + * + * @since 3.1.0 + */ + static CompactableFile create(URI uri, Range range, long estimatedSize, long estimatedEntries) { + return new CompactableFileImpl(uri, range, estimatedSize, estimatedEntries); + } } diff --git a/core/src/main/java/org/apache/accumulo/core/metadata/CompactableFileImpl.java b/core/src/main/java/org/apache/accumulo/core/metadata/CompactableFileImpl.java index e3ff1a334ae..b25e83dde31 100644 --- a/core/src/main/java/org/apache/accumulo/core/metadata/CompactableFileImpl.java +++ b/core/src/main/java/org/apache/accumulo/core/metadata/CompactableFileImpl.java @@ -22,6 +22,7 @@ import java.util.Objects; import org.apache.accumulo.core.client.admin.compaction.CompactableFile; +import org.apache.accumulo.core.data.Range; import org.apache.accumulo.core.metadata.schema.DataFileValue; public class CompactableFileImpl implements CompactableFile { @@ -34,6 +35,11 @@ public CompactableFileImpl(URI uri, long size, long entries) { this.dataFileValue = new DataFileValue(size, entries); } + public CompactableFileImpl(URI uri, Range range, long size, long entries) { + this.storedTabletFile = StoredTabletFile.of(uri, range); + this.dataFileValue = new DataFileValue(size, entries); + } + public CompactableFileImpl(StoredTabletFile storedTabletFile, DataFileValue dataFileValue) { this.storedTabletFile = Objects.requireNonNull(storedTabletFile); this.dataFileValue = Objects.requireNonNull(dataFileValue); @@ -44,6 +50,11 @@ public URI getUri() { return storedTabletFile.getPath().toUri(); } + @Override + public Range getRange() { + return storedTabletFile.getRange(); + } + @Override public String getFileName() { return storedTabletFile.getFileName(); From 759b5d55542ee4e12870ea5484a15eea132a97aa Mon Sep 17 00:00:00 2001 From: Keith Turner Date: Thu, 26 Oct 2023 19:44:23 -0400 Subject: [PATCH 03/12] fixes bug writing to root tablet (#3897) When the root tablet had no location, writes to it were failing. --- .../apache/accumulo/core/clientImpl/RootClientTabletCache.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/java/org/apache/accumulo/core/clientImpl/RootClientTabletCache.java b/core/src/main/java/org/apache/accumulo/core/clientImpl/RootClientTabletCache.java index f86d90b6f85..65090740171 100644 --- a/core/src/main/java/org/apache/accumulo/core/clientImpl/RootClientTabletCache.java +++ b/core/src/main/java/org/apache/accumulo/core/clientImpl/RootClientTabletCache.java @@ -58,7 +58,7 @@ public class RootClientTabletCache extends ClientTabletCache { public void binMutations(ClientContext context, List mutations, Map> binnedMutations, List failures) { CachedTablet rootCachedTablet = getRootTabletLocation(context); - if (rootCachedTablet != null) { + if (rootCachedTablet != null && rootCachedTablet.getTserverLocation().isPresent()) { var tsm = new TabletServerMutations(rootCachedTablet.getTserverSession().orElseThrow()); for (T mutation : mutations) { tsm.addMutation(RootTable.EXTENT, mutation); From 75e959af263d8d38dbb1d077e4206bd448f81e28 Mon Sep 17 00:00:00 2001 From: EdColeman Date: Thu, 26 Oct 2023 21:24:12 -0400 Subject: [PATCH 04/12] Update jline to 3.24.0 (#3895) * Update jline version to 3.24.0 * Remove outdated comment This fixes #3485 --------- Co-authored-by: Ed Coleman Co-authored-by: Christopher Tubbs --- pom.xml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 48665118572..87cdf9c4147 100644 --- a/pom.xml +++ b/pom.xml @@ -629,10 +629,9 @@ 2.2.1.Final - org.jline jline - 3.21.0 + 3.24.0 org.latencyutils From 60f46e1abd8ea53ca7f87076c2b62dace4bd0953 Mon Sep 17 00:00:00 2001 From: EdColeman Date: Fri, 27 Oct 2023 09:12:37 -0400 Subject: [PATCH 05/12] minor QA fixes (#3894) --- .../accumulo/tserver/TabletClientHandler.java | 15 +++++++-------- .../tserver/session/SessionManager.java | 17 +++++------------ 2 files changed, 12 insertions(+), 20 deletions(-) diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/TabletClientHandler.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/TabletClientHandler.java index ec6e417bcee..4256064e8e9 100644 --- a/server/tserver/src/main/java/org/apache/accumulo/tserver/TabletClientHandler.java +++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/TabletClientHandler.java @@ -142,7 +142,7 @@ public class TabletClientHandler implements TabletServerClientService.Iface, private static final Logger log = LoggerFactory.getLogger(TabletClientHandler.class); private final long MAX_TIME_TO_WAIT_FOR_SCAN_RESULT_MILLIS; - private static final long RECENTLY_SPLIT_MILLIES = MINUTES.toMillis(1); + private static final long RECENTLY_SPLIT_MILLIS = MINUTES.toMillis(1); private final TabletServer server; protected final TransactionWatcher watcher; protected final ServerContext context; @@ -263,7 +263,6 @@ private void setUpdateTablet(UpdateSession us, KeyExtent keyExtent) { us.currentTablet = null; us.authFailures.put(keyExtent, SecurityErrorCode.TABLE_DOESNT_EXIST); server.updateMetrics.addUnknownTabletErrors(0); - return; } catch (ThriftSecurityException e) { log.error("Denying permission to check user " + us.getUser() + " with user " + e.getUser(), e); @@ -272,7 +271,6 @@ private void setUpdateTablet(UpdateSession us, KeyExtent keyExtent) { us.currentTablet = null; us.authFailures.put(keyExtent, e.getCode()); server.updateMetrics.addPermissionErrors(0); - return; } } @@ -539,7 +537,8 @@ public UpdateErrors closeUpdate(TInfo tinfo, long updateID) throws NoSuchScanIDE us.authFailures.entrySet().stream() .collect(Collectors.toMap(e -> e.getKey().toThrift(), Entry::getValue))); } finally { - // Atomically unreserve and delete the session. If there any write stragglers, they will fail + // Atomically unreserve and delete the session. If there are any write stragglers, they will + // fail // after this point. server.sessionManager.removeSession(updateID, true); } @@ -761,7 +760,7 @@ public TConditionalSession startConditionalUpdate(TInfo tinfo, TCredentials cred String classLoaderContext) throws ThriftSecurityException, TException { TableId tableId = TableId.of(tableIdStr); - Authorizations userauths = null; + Authorizations userauths; NamespaceId namespaceId = getNamespaceId(credentials, tableId); if (!security.canConditionallyUpdate(credentials, tableId, namespaceId)) { throw new ThriftSecurityException(credentials.getPrincipal(), @@ -834,7 +833,7 @@ public List conditionalUpdate(TInfo tinfo, long sessID, } catch (IOException ioe) { throw new TException(ioe); } catch (Exception e) { - log.warn("Exception returned for conditionalUpdate {}", e); + log.warn("Exception returned for conditionalUpdate. tableId: {}, opid: {}", tid, opid, e); throw e; } finally { writeTracker.finishWrite(opid); @@ -1017,7 +1016,7 @@ public void loadTablet(TInfo tinfo, TCredentials credentials, String lock, for (KeyExtent e2 : onlineOverlapping) { Tablet tablet = server.getOnlineTablet(e2); if (System.currentTimeMillis() - tablet.getSplitCreationTime() - < RECENTLY_SPLIT_MILLIES) { + < RECENTLY_SPLIT_MILLIS) { all.remove(e2); } } @@ -1283,7 +1282,7 @@ public void compactionJobFailed(TInfo tinfo, TCredentials credentials, @Override public List getActiveLogs(TInfo tinfo, TCredentials credentials) { String log = server.logger.getLogFile(); - // Might be null if there no active logger + // Might be null if there is no active logger if (log == null) { return Collections.emptyList(); } diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/session/SessionManager.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/session/SessionManager.java index a8a2cc50dcc..7ff5b70e351 100644 --- a/server/tserver/src/main/java/org/apache/accumulo/tserver/session/SessionManager.java +++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/session/SessionManager.java @@ -70,12 +70,11 @@ public class SessionManager { private final long maxUpdateIdle; private final BlockingQueue deferredCleanupQueue = new ArrayBlockingQueue<>(5000); private final Long expiredSessionMarker = (long) -1; - private final AccumuloConfiguration aconf; private final ServerContext ctx; public SessionManager(ServerContext context) { this.ctx = context; - this.aconf = context.getConfiguration(); + final AccumuloConfiguration aconf = context.getConfiguration(); maxUpdateIdle = aconf.getTimeInMillis(Property.TSERV_UPDATE_SESSION_MAXIDLE); maxIdle = aconf.getTimeInMillis(Property.TSERV_SESSION_MAXIDLE); @@ -230,22 +229,16 @@ public boolean removeIfNotReserved(long sessionId) { return true; } - boolean removed = false; - synchronized (session) { if (session.state == State.RESERVED) { return false; } - session.state = State.REMOVED; - removed = true; } - if (removed) { - sessions.remove(sessionId); - } + sessions.remove(sessionId); - return removed; + return true; } static void cleanup(BlockingQueue deferredCleanupQueue, Session session) { @@ -351,7 +344,7 @@ public Map> getActiveScansPerTable() { Set> copiedIdleSessions = new HashSet<>(); - /** + /* * Add sessions so that get the list returned in the active scans call */ for (Session session : deferredCleanupQueue) { @@ -391,7 +384,7 @@ public List getActiveScans() { final long ct = System.currentTimeMillis(); final Set> copiedIdleSessions = new HashSet<>(); - /** + /* * Add sessions so that get the list returned in the active scans call */ for (Session session : deferredCleanupQueue) { From a2ce072e5c75dc8347b6aa07375efdac5480b015 Mon Sep 17 00:00:00 2001 From: "Christopher L. Shannon" Date: Fri, 27 Oct 2023 18:42:26 -0400 Subject: [PATCH 06/12] Make sure tablets with WALs are not merged (#3900) Add extra verification to make sure that no tablets that are merged contain any WALs to prevent losing data Co-authored-by: Keith Turner --- .../org/apache/accumulo/manager/Manager.java | 6 ++ .../accumulo/manager/TabletGroupWatcher.java | 28 +++++++- .../accumulo/manager/state/MergeStats.java | 22 +++++- .../manager/state/MergeStatsTest.java | 71 +++++++++++++++++++ .../accumulo/test/manager/MergeStateIT.java | 19 +++++ 5 files changed, 142 insertions(+), 4 deletions(-) create mode 100644 server/manager/src/test/java/org/apache/accumulo/manager/state/MergeStatsTest.java diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java b/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java index b86cba576c8..196b50d76cb 100644 --- a/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java @@ -658,6 +658,12 @@ TabletGoalState getGoalState(TabletLocationState tls, MergeInfo mergeInfo) { return TabletGoalState.HOSTED; case WAITING_FOR_OFFLINE: + // If we have walogs we need to be HOSTED to recover + if (!tls.walogs.isEmpty()) { + return TabletGoalState.HOSTED; + } else { + return TabletGoalState.UNASSIGNED; + } case MERGING: return TabletGoalState.UNASSIGNED; } diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java b/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java index b9914bef1f8..4ffeee967da 100644 --- a/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java @@ -72,6 +72,7 @@ import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.DataFileColumnFamily; import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ExternalCompactionColumnFamily; import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.FutureLocationColumnFamily; +import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.LogColumnFamily; import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily; import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.TabletColumnFamily; import org.apache.accumulo.core.metadata.schema.MetadataTime; @@ -698,6 +699,8 @@ private void deleteTablets(MergeInfo info) throws AccumuloException { ServerColumnFamily.TIME_COLUMN.fetch(scanner); scanner.fetchColumnFamily(DataFileColumnFamily.NAME); scanner.fetchColumnFamily(CurrentLocationColumnFamily.NAME); + scanner.fetchColumnFamily(FutureLocationColumnFamily.NAME); + scanner.fetchColumnFamily(LogColumnFamily.NAME); Set datafilesAndDirs = new TreeSet<>(); for (Entry entry : scanner) { Key key = entry.getKey(); @@ -710,9 +713,13 @@ private void deleteTablets(MergeInfo info) throws AccumuloException { } } else if (ServerColumnFamily.TIME_COLUMN.hasColumns(key)) { metadataTime = MetadataTime.parse(entry.getValue().toString()); - } else if (key.compareColumnFamily(CurrentLocationColumnFamily.NAME) == 0) { + } else if (isTabletAssigned(key)) { throw new IllegalStateException( "Tablet " + key.getRow() + " is assigned during a merge!"); + // Verify that Tablet has no WALs + } else if (key.getColumnFamily().equals(LogColumnFamily.NAME)) { + throw new IllegalStateException( + "Tablet " + key.getRow() + " has walogs during a delete!"); } else if (ServerColumnFamily.DIRECTORY_COLUMN.hasColumns(key)) { var allVolumesDirectory = new AllVolumesDirectory(extent.tableId(), entry.getValue().toString()); @@ -783,13 +790,25 @@ private void mergeMetadataRecords(MergeInfo info) throws AccumuloException { TabletColumnFamily.PREV_ROW_COLUMN.fetch(scanner); ServerColumnFamily.TIME_COLUMN.fetch(scanner); ServerColumnFamily.DIRECTORY_COLUMN.fetch(scanner); + scanner.fetchColumnFamily(DataFileColumnFamily.NAME); + scanner.fetchColumnFamily(CurrentLocationColumnFamily.NAME); + scanner.fetchColumnFamily(FutureLocationColumnFamily.NAME); + scanner.fetchColumnFamily(LogColumnFamily.NAME); Mutation m = new Mutation(stopRow); MetadataTime maxLogicalTime = null; for (Entry entry : scanner) { Key key = entry.getKey(); Value value = entry.getValue(); - if (key.getColumnFamily().equals(DataFileColumnFamily.NAME)) { + + // Verify that Tablet is offline + if (isTabletAssigned(key)) { + throw new IllegalStateException( + "Tablet " + key.getRow() + " is assigned during a merge!"); + // Verify that Tablet has no WALs + } else if (key.getColumnFamily().equals(LogColumnFamily.NAME)) { + throw new IllegalStateException("Tablet " + key.getRow() + " has walogs during a merge!"); + } else if (key.getColumnFamily().equals(DataFileColumnFamily.NAME)) { m.put(key.getColumnFamily(), key.getColumnQualifier(), value); fileCount++; } else if (TabletColumnFamily.PREV_ROW_COLUMN.hasColumns(key) @@ -894,6 +913,11 @@ private void deleteTablets(MergeInfo info, Range scanRange, BatchWriter bw, Accu bw.flush(); } + private boolean isTabletAssigned(Key key) { + return key.getColumnFamily().equals(CurrentLocationColumnFamily.NAME) + || key.getColumnFamily().equals(FutureLocationColumnFamily.NAME); + } + private KeyExtent getHighTablet(KeyExtent range) throws AccumuloException { try { AccumuloClient client = manager.getContext(); diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/state/MergeStats.java b/server/manager/src/main/java/org/apache/accumulo/manager/state/MergeStats.java index df18c869da6..5cecbfe98ba 100644 --- a/server/manager/src/main/java/org/apache/accumulo/manager/state/MergeStats.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/state/MergeStats.java @@ -52,6 +52,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; + import io.opentelemetry.api.trace.Span; import io.opentelemetry.context.Scope; @@ -201,6 +204,9 @@ public MergeState nextMergeState(AccumuloClient accumuloClient, CurrentState man private boolean verifyMergeConsistency(AccumuloClient accumuloClient, CurrentState manager) throws TableNotFoundException, IOException { + // The only expected state when this method is called is WAITING_FOR_OFFLINE + verifyState(info, MergeState.WAITING_FOR_OFFLINE); + MergeStats verify = new MergeStats(info); KeyExtent extent = info.getExtent(); Scanner scanner = accumuloClient @@ -230,8 +236,9 @@ private boolean verifyMergeConsistency(AccumuloClient accumuloClient, CurrentSta break; } - if (!tls.walogs.isEmpty() && verify.getMergeInfo().needsToBeChopped(tls.extent)) { - log.debug("failing consistency: needs to be chopped {}", tls.extent); + // Verify that no WALs exist + if (!verifyWalogs(tls)) { + log.debug("failing consistency: {} has walogs {}", tls.extent, tls.walogs.size()); return false; } @@ -271,6 +278,17 @@ private boolean verifyMergeConsistency(AccumuloClient accumuloClient, CurrentSta && unassigned == verify.total; } + @VisibleForTesting + void verifyState(MergeInfo info, MergeState expectedState) { + Preconditions.checkState(info.getState() == expectedState, "Unexpected merge state %s", + info.getState()); + } + + @VisibleForTesting + boolean verifyWalogs(TabletLocationState tls) { + return tls.walogs.isEmpty(); + } + public static void main(String[] args) throws Exception { ServerUtilOpts opts = new ServerUtilOpts(); opts.parseArgs(MergeStats.class.getName(), args); diff --git a/server/manager/src/test/java/org/apache/accumulo/manager/state/MergeStatsTest.java b/server/manager/src/test/java/org/apache/accumulo/manager/state/MergeStatsTest.java new file mode 100644 index 00000000000..c9ad9763c01 --- /dev/null +++ b/server/manager/src/test/java/org/apache/accumulo/manager/state/MergeStatsTest.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.accumulo.manager.state; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.Collection; +import java.util.List; + +import org.apache.accumulo.core.data.TableId; +import org.apache.accumulo.core.dataImpl.KeyExtent; +import org.apache.accumulo.core.metadata.TabletLocationState; +import org.apache.accumulo.core.metadata.TabletLocationState.BadLocationStateException; +import org.apache.accumulo.server.manager.state.MergeInfo; +import org.apache.accumulo.server.manager.state.MergeInfo.Operation; +import org.apache.accumulo.server.manager.state.MergeState; +import org.apache.hadoop.io.Text; +import org.junit.jupiter.api.Test; + +public class MergeStatsTest { + + @Test + public void testVerifyState() { + KeyExtent keyExtent = new KeyExtent(TableId.of("table"), new Text("end"), new Text("begin")); + MergeInfo mergeInfo = new MergeInfo(keyExtent, Operation.MERGE); + MergeStats stats = new MergeStats(mergeInfo); + mergeInfo.setState(MergeState.WAITING_FOR_OFFLINE); + + // Verify WAITING_FOR_OFFLINE does not throw an exception + stats.verifyState(mergeInfo, MergeState.WAITING_FOR_OFFLINE); + + // State is wrong so should throw exception + mergeInfo.setState(MergeState.WAITING_FOR_CHOPPED); + assertThrows(IllegalStateException.class, + () -> stats.verifyState(mergeInfo, MergeState.WAITING_FOR_OFFLINE)); + } + + @Test + public void testVerifyWalogs() throws BadLocationStateException { + KeyExtent keyExtent = new KeyExtent(TableId.of("table"), new Text("end"), new Text("begin")); + MergeStats stats = new MergeStats(new MergeInfo(keyExtent, Operation.MERGE)); + + // Verify that if there are Walogs the return true, else false + assertTrue(stats.verifyWalogs(getState(keyExtent, List.of()))); + assertFalse(stats.verifyWalogs(getState(keyExtent, List.of(List.of("log1"))))); + } + + private TabletLocationState getState(KeyExtent keyExtent, Collection> walogs) + throws BadLocationStateException { + return new TabletLocationState(keyExtent, null, null, null, null, walogs, true); + } + +} diff --git a/test/src/main/java/org/apache/accumulo/test/manager/MergeStateIT.java b/test/src/main/java/org/apache/accumulo/test/manager/MergeStateIT.java index e36c8b8dd8a..21173c3c5e8 100644 --- a/test/src/main/java/org/apache/accumulo/test/manager/MergeStateIT.java +++ b/test/src/main/java/org/apache/accumulo/test/manager/MergeStateIT.java @@ -46,6 +46,7 @@ import org.apache.accumulo.core.metadata.schema.TabletMetadata.Location; import org.apache.accumulo.core.security.Authorizations; import org.apache.accumulo.core.security.TablePermission; +import org.apache.accumulo.core.tabletserver.log.LogEntry; import org.apache.accumulo.core.util.HostAndPort; import org.apache.accumulo.manager.state.MergeStats; import org.apache.accumulo.server.ServerContext; @@ -207,6 +208,24 @@ public void test() throws Exception { metaDataStateStore.unassign(Collections.singletonList(new TabletLocationState(tablet, null, Location.current(state.someTServer), null, null, walogs, false)), null); + // Add a walog which should keep the state from transitioning to MERGING + KeyExtent ke = new KeyExtent(tableId, new Text("t"), new Text("p")); + m = new Mutation(ke.toMetaRow()); + LogEntry logEntry = new LogEntry(ke, 100, "f1"); + m.at().family(logEntry.getColumnFamily()).qualifier(logEntry.getColumnQualifier()) + .timestamp(logEntry.timestamp).put(logEntry.getValue()); + update(accumuloClient, m); + + // Verify state is still WAITING_FOR_OFFLINE + stats = scan(state, metaDataStateStore); + newState = stats.nextMergeState(accumuloClient, state); + assertEquals(MergeState.WAITING_FOR_OFFLINE, newState); + + // Delete the walog which will now allow a transition to MERGING + m = new Mutation(ke.toMetaRow()); + m.putDelete(logEntry.getColumnFamily(), logEntry.getColumnQualifier(), logEntry.timestamp); + update(accumuloClient, m); + // now we can split stats = scan(state, metaDataStateStore); assertEquals(MergeState.MERGING, stats.nextMergeState(accumuloClient, state)); From 3564225e16175e98034d3ae22f158bf2a94fea04 Mon Sep 17 00:00:00 2001 From: Dave Marion Date: Mon, 30 Oct 2023 12:15:19 -0400 Subject: [PATCH 07/12] Modify MACC.adminStopAll to stop sserver and compactor (#3902) The call to Manager.shutdown does not stop the ScanServers and Compactors. --- .../accumulo/miniclusterImpl/MiniAccumuloClusterControl.java | 2 ++ .../java/org/apache/accumulo/test/functional/RestartIT.java | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/minicluster/src/main/java/org/apache/accumulo/miniclusterImpl/MiniAccumuloClusterControl.java b/minicluster/src/main/java/org/apache/accumulo/miniclusterImpl/MiniAccumuloClusterControl.java index de16f12e541..6d3b36026ef 100644 --- a/minicluster/src/main/java/org/apache/accumulo/miniclusterImpl/MiniAccumuloClusterControl.java +++ b/minicluster/src/main/java/org/apache/accumulo/miniclusterImpl/MiniAccumuloClusterControl.java @@ -116,6 +116,8 @@ public void adminStopAll() throws IOException { if (p.exitValue() != 0) { throw new IOException("Failed to run `accumulo admin stopAll`"); } + stopAllServers(ServerType.COMPACTOR); + stopAllServers(ServerType.SCAN_SERVER); } @Override diff --git a/test/src/main/java/org/apache/accumulo/test/functional/RestartIT.java b/test/src/main/java/org/apache/accumulo/test/functional/RestartIT.java index 3ab2b968593..5e952629945 100644 --- a/test/src/main/java/org/apache/accumulo/test/functional/RestartIT.java +++ b/test/src/main/java/org/apache/accumulo/test/functional/RestartIT.java @@ -51,7 +51,6 @@ import org.apache.hadoop.fs.RawLocalFileSystem; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -232,7 +231,6 @@ public void killedTabletServer2() throws Exception { } @Test - @Disabled // ELASTICITY_TODO public void killedTabletServerDuringShutdown() throws Exception { try (AccumuloClient c = Accumulo.newClient().from(getClientProps()).build()) { String tableName = getUniqueNames(1)[0]; From f79bf079509b6e64ac002425163df4bf191a9d28 Mon Sep 17 00:00:00 2001 From: Christopher Tubbs Date: Mon, 30 Oct 2023 13:05:28 -0400 Subject: [PATCH 08/12] Bump ZooKeeper for CVE-2023-44981 (#3899) Bump ZooKeeper to 3.8.3 to address warnings about CVE-2023-44981 (Note: this CVE affects SASL-configured server deployments of ZK, not ZK client code, like how Accumulo uses it, but this removes the warning from GitHub about critical vulnerabilities in Accumulo, and in general, Accumulo tries to develop against the latest patched version of a particular release anyway.) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 87cdf9c4147..12e73790460 100644 --- a/pom.xml +++ b/pom.xml @@ -156,7 +156,7 @@ 2.0.9 2.0.7 0.17.0 - 3.8.2 + 3.8.3 From 128a8d0ed1ef76d487259e2ac8236383e963b667 Mon Sep 17 00:00:00 2001 From: Dave Marion Date: Mon, 30 Oct 2023 13:55:15 -0400 Subject: [PATCH 09/12] Fix inconsistent view of TabletServers in Manager (#3901) Modified LiveTserverSet so that the set of tablet servers and the tablet servers resource groups are acquired atomically. The code was acquiring this information at two different times with two different lock acquisitions, which could have led to race condtions resulting differences in set and the map. Fixed some cases in the balancers that were not handling an empty or non-existent group correctly. Co-authored-by: Keith Turner --- .../balancer/AssignmentParamsImpl.java | 27 ++++-- .../core/spi/balancer/SimpleLoadBalancer.java | 4 + .../core/spi/balancer/TableLoadBalancer.java | 8 +- .../server/manager/LiveTServerSet.java | 84 ++++++++++++++----- .../server/manager/state/CurrentState.java | 3 +- .../state/TabletManagementIterator.java | 6 +- .../org/apache/accumulo/manager/Manager.java | 16 ++-- .../accumulo/manager/TabletGroupWatcher.java | 57 +++++++++---- .../TabletManagementIteratorIT.java | 11 +-- 9 files changed, 154 insertions(+), 62 deletions(-) diff --git a/core/src/main/java/org/apache/accumulo/core/manager/balancer/AssignmentParamsImpl.java b/core/src/main/java/org/apache/accumulo/core/manager/balancer/AssignmentParamsImpl.java index 7bdbf70fc1a..bf5e387facd 100644 --- a/core/src/main/java/org/apache/accumulo/core/manager/balancer/AssignmentParamsImpl.java +++ b/core/src/main/java/org/apache/accumulo/core/manager/balancer/AssignmentParamsImpl.java @@ -34,8 +34,13 @@ import org.apache.accumulo.core.spi.balancer.TabletBalancer; import org.apache.accumulo.core.spi.balancer.data.TServerStatus; import org.apache.accumulo.core.spi.balancer.data.TabletServerId; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class AssignmentParamsImpl implements TabletBalancer.AssignmentParameters { + + private static final Logger LOG = LoggerFactory.getLogger(AssignmentParamsImpl.class); + private final SortedMap currentStatus; private final Map unassigned; private final Map assignmentsOut; @@ -50,16 +55,26 @@ public static AssignmentParamsImpl fromThrift( Map unassigned, Map assignmentsOut) { SortedMap currentStatusNew = new TreeMap<>(); - currentStatus.forEach((tsi, status) -> currentStatusNew.put(new TabletServerIdImpl(tsi), - TServerStatusImpl.fromThrift(status))); - Map> tserverGroups = new HashMap<>(); - currentTServerGrouping.forEach((k, v) -> { + currentTServerGrouping.forEach((group, serversInGroup) -> { Set servers = new HashSet<>(); - v.forEach(tsi -> servers.add(TabletServerIdImpl.fromThrift(tsi))); - tserverGroups.put(k, servers); + serversInGroup.forEach(tsi -> { + TabletServerIdImpl id = TabletServerIdImpl.fromThrift(tsi); + if (currentStatus.containsKey(tsi)) { + currentStatusNew.put(id, TServerStatusImpl.fromThrift(currentStatus.get(tsi))); + servers.add(id); + } else { + LOG.debug("Dropping tserver {} from group {} as it's not in set of all servers", id, + group); + } + }); + if (!servers.isEmpty()) { + tserverGroups.put(group, servers); + } }); + LOG.debug("TServer groups for balancer assignment: {}", tserverGroups); + Map unassignedNew = new HashMap<>(); unassigned.forEach( (ke, tsi) -> unassignedNew.put(new TabletIdImpl(ke), TabletServerIdImpl.fromThrift(tsi))); diff --git a/core/src/main/java/org/apache/accumulo/core/spi/balancer/SimpleLoadBalancer.java b/core/src/main/java/org/apache/accumulo/core/spi/balancer/SimpleLoadBalancer.java index 50fcd8417fb..09189c0192a 100644 --- a/core/src/main/java/org/apache/accumulo/core/spi/balancer/SimpleLoadBalancer.java +++ b/core/src/main/java/org/apache/accumulo/core/spi/balancer/SimpleLoadBalancer.java @@ -356,6 +356,10 @@ private static TableId busiest(Map tables) { @Override public void getAssignments(AssignmentParameters params) { + if (params.currentStatus().isEmpty()) { + log.debug("No known TabletServers, skipping tablet assignment for now."); + return; + } params.unassignedTablets().forEach((tabletId, tserverId) -> params.addAssignment(tabletId, getAssignment(params.currentStatus(), tserverId))); } diff --git a/core/src/main/java/org/apache/accumulo/core/spi/balancer/TableLoadBalancer.java b/core/src/main/java/org/apache/accumulo/core/spi/balancer/TableLoadBalancer.java index 853608018f9..44cf29d1e87 100644 --- a/core/src/main/java/org/apache/accumulo/core/spi/balancer/TableLoadBalancer.java +++ b/core/src/main/java/org/apache/accumulo/core/spi/balancer/TableLoadBalancer.java @@ -149,10 +149,12 @@ private SortedMap getCurrentSetForTable( tserversInGroup.forEach(tsid -> { TServerStatus tss = allTServers.get(tsid); if (tss == null) { - throw new IllegalStateException("TabletServer " + tsid + " in " + groupNameInUse - + " TabletServer group, but not in set of all TabletServers"); + log.warn( + "Excluding TabletServer {} from group {} because TabletServerStatus is null, likely that Manager.StatusThread.updateStatus has not discovered it yet.", + tsid, groupNameInUse); + } else { + group.put(tsid, tss); } - group.put(tsid, tss); }); return group; } diff --git a/server/base/src/main/java/org/apache/accumulo/server/manager/LiveTServerSet.java b/server/base/src/main/java/org/apache/accumulo/server/manager/LiveTServerSet.java index 018b7eaf896..f5914c4a3fb 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/manager/LiveTServerSet.java +++ b/server/base/src/main/java/org/apache/accumulo/server/manager/LiveTServerSet.java @@ -22,6 +22,7 @@ import static org.apache.accumulo.core.fate.zookeeper.ZooUtil.NodeMissingPolicy.SKIP; import java.nio.ByteBuffer; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Map; @@ -60,6 +61,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.common.annotations.VisibleForTesting; import com.google.common.net.HostAndPort; public class LiveTServerSet implements Watcher { @@ -204,12 +206,11 @@ static class TServerInfo { } } - // The set of active tservers with locks, indexed by their name in zookeeper + // The set of active tservers with locks, indexed by their name in zookeeper. When the contents of + // this map are modified, tServersSnapshot should be set to null. private final Map current = new HashMap<>(); - // as above, indexed by TServerInstance - private final Map currentInstances = new HashMap<>(); - // as above, grouped by resource group name - private final Map> currentGroups = new HashMap<>(); + + private LiveTServersSnapshot tServersSnapshot = null; // The set of entries in zookeeper without locks, and the first time each was noticed private final Map locklessServers = new HashMap<>(); @@ -270,6 +271,9 @@ private synchronized void checkServer(final Set updates, final Set doomed, final String path, final String zPath) throws InterruptedException, KeeperException { + // invalidate the snapshot forcing it to be recomputed the next time its requested + tServersSnapshot = null; + TServerInfo info = current.get(zPath); final var zLockPath = ServiceLock.path(path + "/" + zPath); @@ -280,8 +284,6 @@ private synchronized void checkServer(final Set updates, if (info != null) { doomed.add(info.instance); current.remove(zPath); - currentInstances.remove(info.instance); - currentGroups.get(info.resourceGroup).remove(info.instance); } Long firstSeen = locklessServers.get(zPath); @@ -302,18 +304,12 @@ private synchronized void checkServer(final Set updates, TServerInfo tServerInfo = new TServerInfo(instance, new TServerConnection(address), resourceGroup); current.put(zPath, tServerInfo); - currentInstances.put(instance, tServerInfo); - currentGroups.computeIfAbsent(resourceGroup, rg -> new HashSet<>()).add(instance); } else if (!info.instance.equals(instance)) { doomed.add(info.instance); updates.add(instance); TServerInfo tServerInfo = new TServerInfo(instance, new TServerConnection(address), resourceGroup); current.put(zPath, tServerInfo); - currentInstances.remove(info.instance); - currentGroups.getOrDefault(resourceGroup, new HashSet<>()).remove(instance); - currentInstances.put(instance, tServerInfo); - currentGroups.computeIfAbsent(resourceGroup, rg -> new HashSet<>()).add(instance); } } } @@ -356,21 +352,64 @@ public synchronized TServerConnection getConnection(TServerInstance server) { if (server == null) { return null; } - TServerInfo tServerInfo = currentInstances.get(server); + TServerInfo tServerInfo = getSnapshot().tserversInfo.get(server); if (tServerInfo == null) { return null; } return tServerInfo.connection; } - public synchronized Set getCurrentServers() { - return new HashSet<>(currentInstances.keySet()); + public static class LiveTServersSnapshot { + private final Set tservers; + private final Map> tserverGroups; + + // TServerInfo is only for internal use, so this field is private w/o a getter. + private final Map tserversInfo; + + @VisibleForTesting + public LiveTServersSnapshot(Set currentServers, + Map> serverGroups) { + this.tserversInfo = null; + this.tservers = Set.copyOf(currentServers); + Map> copy = new HashMap<>(); + serverGroups.forEach((k, v) -> copy.put(k, Set.copyOf(v))); + this.tserverGroups = Collections.unmodifiableMap(copy); + } + + public LiveTServersSnapshot(Map currentServers, + Map> serverGroups) { + this.tserversInfo = Map.copyOf(currentServers); + this.tservers = this.tserversInfo.keySet(); + Map> copy = new HashMap<>(); + serverGroups.forEach((k, v) -> copy.put(k, Set.copyOf(v))); + this.tserverGroups = Collections.unmodifiableMap(copy); + } + + public Set getTservers() { + return tservers; + } + + public Map> getTserverGroups() { + return tserverGroups; + } + } + + public synchronized LiveTServersSnapshot getSnapshot() { + if (tServersSnapshot == null) { + HashMap tServerInstances = new HashMap<>(); + Map> tserversGroups = new HashMap<>(); + current.values().forEach(tServerInfo -> { + tServerInstances.put(tServerInfo.instance, tServerInfo); + tserversGroups.computeIfAbsent(tServerInfo.resourceGroup, rg -> new HashSet<>()) + .add(tServerInfo.instance); + }); + tServersSnapshot = new LiveTServersSnapshot(tServerInstances, tserversGroups); + } + return tServersSnapshot; } - public synchronized Map> getCurrentServersGroups() { - Map> copy = new HashMap<>(); - currentGroups.forEach((k, v) -> copy.put(k, new HashSet<>(v))); - return copy; + public synchronized Set getCurrentServers() { + return getSnapshot().getTservers(); } public synchronized int size() { @@ -407,6 +446,10 @@ TServerInstance find(Map servers, String tabletServer) { } public synchronized void remove(TServerInstance server) { + + // invalidate the snapshot forcing it to be recomputed the next time its requested + tServersSnapshot = null; + String zPath = null; for (Entry entry : current.entrySet()) { if (entry.getValue().instance.equals(server)) { @@ -418,7 +461,6 @@ public synchronized void remove(TServerInstance server) { return; } current.remove(zPath); - currentInstances.remove(server); log.info("Removing zookeeper lock for {}", server); String fullpath = context.getZooKeeperRoot() + Constants.ZTSERVERS + "/" + zPath; diff --git a/server/base/src/main/java/org/apache/accumulo/server/manager/state/CurrentState.java b/server/base/src/main/java/org/apache/accumulo/server/manager/state/CurrentState.java index 09caba293ee..1b72fa2a55d 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/manager/state/CurrentState.java +++ b/server/base/src/main/java/org/apache/accumulo/server/manager/state/CurrentState.java @@ -25,6 +25,7 @@ import org.apache.accumulo.core.dataImpl.KeyExtent; import org.apache.accumulo.core.manager.thrift.ManagerState; import org.apache.accumulo.core.metadata.TServerInstance; +import org.apache.accumulo.server.manager.LiveTServerSet.LiveTServersSnapshot; public interface CurrentState { @@ -32,7 +33,7 @@ public interface CurrentState { Set onlineTabletServers(); - Map> tServerResourceGroups(); + LiveTServersSnapshot tserversSnapshot(); Set shutdownServers(); diff --git a/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletManagementIterator.java b/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletManagementIterator.java index 41646f41450..b44bc5d37ea 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletManagementIterator.java +++ b/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletManagementIterator.java @@ -76,6 +76,7 @@ import org.apache.accumulo.core.util.AddressUtil; import org.apache.accumulo.server.compaction.CompactionJobGenerator; import org.apache.accumulo.server.iterators.TabletIteratorEnvironment; +import org.apache.accumulo.server.manager.LiveTServerSet.LiveTServersSnapshot; import org.apache.accumulo.server.manager.balancer.BalancerEnvironmentImpl; import org.apache.hadoop.io.DataInputBuffer; import org.apache.hadoop.io.DataOutputBuffer; @@ -304,13 +305,14 @@ public static void configureScanner(final ScannerBase scanner, final CurrentStat IteratorSetting tabletChange = new IteratorSetting(1001, "ManagerTabletInfoIterator", TabletManagementIterator.class); if (state != null) { - TabletManagementIterator.setCurrentServers(tabletChange, state.onlineTabletServers()); + LiveTServersSnapshot tserversSnapshot = state.tserversSnapshot(); + TabletManagementIterator.setCurrentServers(tabletChange, tserversSnapshot.getTservers()); TabletManagementIterator.setOnlineTables(tabletChange, state.onlineTables()); TabletManagementIterator.setMigrations(tabletChange, state.migrationsSnapshot()); TabletManagementIterator.setManagerState(tabletChange, state.getManagerState()); TabletManagementIterator.setShuttingDown(tabletChange, state.shutdownServers()); TabletManagementIterator.setTServerResourceGroups(tabletChange, - state.tServerResourceGroups()); + tserversSnapshot.getTserverGroups()); setCompactionHints(tabletChange, state.getCompactionHints()); } scanner.addScanIterator(tabletChange); diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java b/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java index 5257a610a3c..b5de7196d0d 100644 --- a/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java @@ -129,6 +129,7 @@ import org.apache.accumulo.server.compaction.CompactionConfigStorage; import org.apache.accumulo.server.fs.VolumeManager; import org.apache.accumulo.server.manager.LiveTServerSet; +import org.apache.accumulo.server.manager.LiveTServerSet.LiveTServersSnapshot; import org.apache.accumulo.server.manager.LiveTServerSet.TServerConnection; import org.apache.accumulo.server.manager.balancer.BalancerEnvironmentImpl; import org.apache.accumulo.server.manager.state.CurrentState; @@ -819,12 +820,11 @@ public void run() { } private long updateStatus() { - Set currentServers = tserverSet.getCurrentServers(); + var tseversSnapshot = tserverSet.getSnapshot(); TreeMap temp = new TreeMap<>(); - tserverStatus = gatherTableInformation(currentServers, temp); + tserverStatus = gatherTableInformation(tseversSnapshot.getTservers(), temp); tserverStatusForBalancer = Collections.unmodifiableSortedMap(temp); - tServerGroupingForBalancer = - Collections.unmodifiableMap(tserverSet.getCurrentServersGroups()); + tServerGroupingForBalancer = tseversSnapshot.getTserverGroups(); checkForHeldServer(tserverStatus); if (!badServers.isEmpty()) { @@ -838,7 +838,7 @@ private long updateStatus() { log.debug("not balancing while shutting down servers {}", serversToShutdown); } else { for (TabletGroupWatcher tgw : watchers) { - if (!tgw.isSameTserversAsLastScan(currentServers)) { + if (!tgw.isSameTserversAsLastScan(tseversSnapshot.getTservers())) { log.debug("not balancing just yet, as collection of live tservers is in flux"); return DEFAULT_WAIT_FOR_WATCHER; } @@ -1606,12 +1606,12 @@ public Set onlineTables() { @Override public Set onlineTabletServers() { - return tserverSet.getCurrentServers(); + return tserverSet.getSnapshot().getTservers(); } @Override - public Map> tServerResourceGroups() { - return tserverSet.getCurrentServersGroups(); + public LiveTServersSnapshot tserversSnapshot() { + return tserverSet.getSnapshot(); } // recovers state from the persistent transaction to shutdown a server diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java b/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java index cdb6953e8aa..e0d0e5db7e3 100644 --- a/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java @@ -26,6 +26,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -81,6 +82,7 @@ import org.apache.accumulo.server.conf.TableConfiguration; import org.apache.accumulo.server.log.WalStateManager; import org.apache.accumulo.server.log.WalStateManager.WalMarkerException; +import org.apache.accumulo.server.manager.LiveTServerSet; import org.apache.accumulo.server.manager.LiveTServerSet.TServerConnection; import org.apache.accumulo.server.manager.state.Assignment; import org.apache.accumulo.server.manager.state.ClosableIterator; @@ -173,10 +175,30 @@ private static class TabletLists { public TabletLists(Manager m, SortedMap curTServers, Map> grouping) { - var destinationsMod = new TreeMap<>(curTServers); - destinationsMod.keySet().removeAll(m.serversToShutdown); - this.destinations = Collections.unmodifiableSortedMap(destinationsMod); - this.currentTServerGrouping = grouping; + synchronized (m.serversToShutdown) { + var destinationsMod = new TreeMap<>(curTServers); + if (!m.serversToShutdown.isEmpty()) { + // Remove servers that are in the process of shutting down from the lists of tablet + // servers. + destinationsMod.keySet().removeAll(m.serversToShutdown); + HashMap> groupingCopy = new HashMap<>(); + grouping.forEach((group, groupsServers) -> { + if (Collections.disjoint(groupsServers, m.serversToShutdown)) { + groupingCopy.put(group, groupsServers); + } else { + var serversCopy = new HashSet<>(groupsServers); + serversCopy.removeAll(m.serversToShutdown); + groupingCopy.put(group, Collections.unmodifiableSet(serversCopy)); + } + }); + + this.currentTServerGrouping = Collections.unmodifiableMap(groupingCopy); + } else { + this.currentTServerGrouping = grouping; + } + + this.destinations = Collections.unmodifiableSortedMap(destinationsMod); + } } public void reset() { @@ -218,7 +240,9 @@ public void run() { continue; } - var currentTservers = getCurrentTservers(); + LiveTServerSet.LiveTServersSnapshot tservers = manager.tserverSet.getSnapshot(); + var currentTservers = getTserversStatus(tservers.getTservers()); + if (currentTservers.isEmpty()) { setNeedsFullScan(); continue; @@ -226,7 +250,7 @@ public void run() { try (var iter = store.iterator(ranges)) { long t1 = System.currentTimeMillis(); - manageTablets(iter, currentTservers, false); + manageTablets(iter, currentTservers, tservers.getTserverGroups(), false); long t2 = System.currentTimeMillis(); Manager.log.debug(String.format("[%s]: partial scan time %.2f seconds for %,d ranges", store.name(), (t2 - t1) / 1000., ranges.size())); @@ -296,7 +320,8 @@ private static class TableMgmtStats { } private TableMgmtStats manageTablets(Iterator iter, - SortedMap currentTServers, boolean isFullScan) + SortedMap currentTServers, + Map> tserverGroups, boolean isFullScan) throws BadLocationStateException, TException, DistributedStoreException, WalMarkerException, IOException { @@ -312,16 +337,13 @@ private TableMgmtStats manageTablets(Iterator iter, int unloaded = 0; - final Map> currentTServerGrouping = - manager.tserverSet.getCurrentServersGroups(); - - TabletLists tLists = new TabletLists(manager, currentTServers, currentTServerGrouping); + TabletLists tLists = new TabletLists(manager, currentTServers, tserverGroups); CompactionJobGenerator compactionGenerator = new CompactionJobGenerator( new ServiceEnvironmentImpl(manager.getContext()), manager.getCompactionHints()); final Map resourceGroups = new HashMap<>(); - manager.tServerResourceGroups().forEach((group, tservers) -> { + tserverGroups.forEach((group, tservers) -> { tservers.stream().map(TabletServerIdImpl::new) .forEach(tabletServerId -> resourceGroups.put(tabletServerId, group)); }); @@ -541,10 +563,11 @@ private TableMgmtStats manageTablets(Iterator iter, return tableMgmtStats; } - private SortedMap getCurrentTservers() { + private SortedMap + getTserversStatus(Set currentServers) { // Get the current status for the current list of tservers final SortedMap currentTServers = new TreeMap<>(); - for (TServerInstance entry : manager.tserverSet.getCurrentServers()) { + for (TServerInstance entry : currentServers) { currentTServers.put(entry, manager.tserverStatus.get(entry)); } return currentTServers; @@ -563,7 +586,8 @@ public void run() { final long waitTimeBetweenScans = manager.getConfiguration() .getTimeInMillis(Property.MANAGER_TABLET_GROUP_WATCHER_INTERVAL); - var currentTServers = getCurrentTservers(); + LiveTServerSet.LiveTServersSnapshot tservers = manager.tserverSet.getSnapshot(); + var currentTServers = getTserversStatus(tservers.getTservers()); ClosableIterator iter = null; try { @@ -584,7 +608,8 @@ public void run() { eventHandler.clearNeedsFullScan(); iter = store.iterator(); - var tabletMgmtStats = manageTablets(iter, currentTServers, true); + var tabletMgmtStats = + manageTablets(iter, currentTServers, tservers.getTserverGroups(), true); // provide stats after flushing changes to avoid race conditions w/ delete table stats.end(managerState); diff --git a/test/src/main/java/org/apache/accumulo/test/functional/TabletManagementIteratorIT.java b/test/src/main/java/org/apache/accumulo/test/functional/TabletManagementIteratorIT.java index 9c3c2a37f84..3b1a688ca8d 100644 --- a/test/src/main/java/org/apache/accumulo/test/functional/TabletManagementIteratorIT.java +++ b/test/src/main/java/org/apache/accumulo/test/functional/TabletManagementIteratorIT.java @@ -71,6 +71,7 @@ import org.apache.accumulo.core.security.Authorizations; import org.apache.accumulo.core.util.UtilWaitThread; import org.apache.accumulo.harness.AccumuloClusterHarness; +import org.apache.accumulo.server.manager.LiveTServerSet; import org.apache.accumulo.server.manager.state.CurrentState; import org.apache.accumulo.server.manager.state.TabletManagementIterator; import org.apache.hadoop.io.Text; @@ -372,6 +373,11 @@ public Set onlineTabletServers() { return tservers; } + @Override + public LiveTServerSet.LiveTServersSnapshot tserversSnapshot() { + return new LiveTServerSet.LiveTServersSnapshot(onlineTabletServers(), new HashMap<>()); + } + @Override public Set onlineTables() { Set onlineTables = context.getTableIdToNameMap().keySet(); @@ -380,11 +386,6 @@ public Set onlineTables() { return this.onlineTables; } - @Override - public Map> tServerResourceGroups() { - return new HashMap<>(); - } - @Override public Set migrationsSnapshot() { return Collections.emptySet(); From e8688474b2e6eeda739fb67da1b6c1fc9cf9fa6b Mon Sep 17 00:00:00 2001 From: "Christopher L. Shannon" Date: Mon, 30 Oct 2023 17:55:36 -0400 Subject: [PATCH 10/12] Prevent merge/delete operations when wals exist (#3903) This commit updates the merge and delete rows FATE ops to verify that if tablets being processed do not have any wals This closes #3845 Co-authored-by: Keith Turner --- .../manager/state/TabletManagementIterator.java | 7 +++++++ .../apache/accumulo/manager/TabletGroupWatcher.java | 10 +++++++++- .../accumulo/manager/tableOps/merge/DeleteRows.java | 3 ++- .../manager/tableOps/merge/MergeTablets.java | 6 +++++- .../manager/tableOps/merge/ReserveTablets.java | 12 ++++++++---- 5 files changed, 31 insertions(+), 7 deletions(-) diff --git a/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletManagementIterator.java b/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletManagementIterator.java index b44bc5d37ea..76ac7576a07 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletManagementIterator.java +++ b/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletManagementIterator.java @@ -69,6 +69,7 @@ import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.SuspendLocationColumn; import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.TabletColumnFamily; import org.apache.accumulo.core.metadata.schema.TabletMetadata; +import org.apache.accumulo.core.metadata.schema.TabletOperationType; import org.apache.accumulo.core.spi.balancer.SimpleLoadBalancer; import org.apache.accumulo.core.spi.balancer.TabletBalancer; import org.apache.accumulo.core.spi.balancer.data.TabletServerId; @@ -280,6 +281,12 @@ private boolean shouldReturnDueToLocation(final TabletMetadata tm, || (tm.getHostingGoal() == TabletHostingGoal.ONDEMAND && tm.getHostingRequested()))) { return true; } + // If the Tablet has walogs and operation id then need to return so + // TGW can bring online to process the logs + if (!tm.getLogs().isEmpty() && tm.getOperationId() != null + && tm.getOperationId().getType() == TabletOperationType.MERGING) { + return true; + } break; default: throw new AssertionError("Inconceivable! The tablet is an unrecognized state: " + state); diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java b/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java index e0d0e5db7e3..0b7902bd46a 100644 --- a/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java @@ -67,6 +67,7 @@ import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.FutureLocationColumnFamily; import org.apache.accumulo.core.metadata.schema.TabletMetadata; import org.apache.accumulo.core.metadata.schema.TabletMetadata.Location; +import org.apache.accumulo.core.metadata.schema.TabletOperationType; import org.apache.accumulo.core.security.Authorizations; import org.apache.accumulo.core.spi.balancer.data.TabletServerId; import org.apache.accumulo.core.util.TextUtil; @@ -418,7 +419,14 @@ private TableMgmtStats manageTablets(Iterator iter, } if (tm.getOperationId() != null) { - goal = TabletGoalState.UNASSIGNED; + // If there are still wals the tablet needs to be hosted + // to process the wals before starting the merge op + if (!tm.getLogs().isEmpty() + && tm.getOperationId().getType() == TabletOperationType.MERGING) { + goal = TabletGoalState.HOSTED; + } else { + goal = TabletGoalState.UNASSIGNED; + } } if (Manager.log.isTraceEnabled()) { diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/merge/DeleteRows.java b/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/merge/DeleteRows.java index b67417f3216..6413e83c145 100644 --- a/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/merge/DeleteRows.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/merge/DeleteRows.java @@ -20,6 +20,7 @@ import static org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.FILES; import static org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.LOCATION; +import static org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.LOGS; import static org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.OPID; import static org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.PREV_ROW; import static org.apache.accumulo.manager.tableOps.merge.MergeTablets.validateTablet; @@ -90,7 +91,7 @@ private Optional deleteTabletFiles(Manager manager, long tid) { try ( var tabletsMetadata = manager.getContext().getAmple().readTablets() .forTable(range.tableId()).overlapping(range.prevEndRow(), range.endRow()) - .fetch(OPID, LOCATION, FILES, PREV_ROW).checkConsistency().build(); + .fetch(OPID, LOCATION, FILES, PREV_ROW, LOGS).checkConsistency().build(); var tabletsMutator = manager.getContext().getAmple().conditionallyMutateTablets()) { KeyExtent firstCompleteContained = null; diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/merge/MergeTablets.java b/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/merge/MergeTablets.java index 8d5dbc8d7c7..ddb1616f806 100644 --- a/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/merge/MergeTablets.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/merge/MergeTablets.java @@ -23,6 +23,7 @@ import static org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.FILES; import static org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.HOSTING_GOAL; import static org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.LOCATION; +import static org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.LOGS; import static org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.OPID; import static org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.PREV_ROW; import static org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.TIME; @@ -95,7 +96,7 @@ private void mergeMetadataRecords(Manager manager, long tid) throws AccumuloExce try (var tabletsMetadata = manager.getContext().getAmple().readTablets() .forTable(range.tableId()).overlapping(range.prevEndRow(), range.endRow()) - .fetch(OPID, LOCATION, HOSTING_GOAL, FILES, TIME, DIR, ECOMP, PREV_ROW).build()) { + .fetch(OPID, LOCATION, HOSTING_GOAL, FILES, TIME, DIR, ECOMP, PREV_ROW, LOGS).build()) { int tabletsSeen = 0; @@ -233,6 +234,9 @@ static void validateTablet(TabletMetadata tabletMeta, String fateStr, TabletOper Preconditions.checkState(expectedTableId.equals(tabletMeta.getTableId()), "%s tablet %s has unexpected table id %s expected %s", fateStr, tabletMeta.getExtent(), tabletMeta.getTableId(), expectedTableId); + Preconditions.checkState(tabletMeta.getLogs().isEmpty(), + "%s merging tablet %s has unexpected walogs %s", fateStr, tabletMeta.getExtent(), + tabletMeta.getLogs().size()); } /** diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/merge/ReserveTablets.java b/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/merge/ReserveTablets.java index 04a5511b689..87f319f6337 100644 --- a/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/merge/ReserveTablets.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/merge/ReserveTablets.java @@ -19,6 +19,7 @@ package org.apache.accumulo.manager.tableOps.merge; import static org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.LOCATION; +import static org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.LOGS; import static org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.OPID; import static org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.PREV_ROW; @@ -54,7 +55,7 @@ public long isReady(long tid, Manager env) throws Exception { try ( var tablets = env.getContext().getAmple().readTablets().forTable(data.tableId) - .overlapping(range.prevEndRow(), range.endRow()).fetch(PREV_ROW, LOCATION, OPID) + .overlapping(range.prevEndRow(), range.endRow()).fetch(PREV_ROW, LOCATION, LOGS, OPID) .checkConsistency().build(); var tabletsMutator = env.getContext().getAmple().conditionallyMutateTablets();) { @@ -62,6 +63,7 @@ public long isReady(long tid, Manager env) throws Exception { int otherOps = 0; int opsSet = 0; int locations = 0; + int wals = 0; for (var tabletMeta : tablets) { @@ -77,6 +79,8 @@ public long isReady(long tid, Manager env) throws Exception { locations++; } + wals += tabletMeta.getLogs().size(); + count++; } @@ -84,8 +88,8 @@ public long isReady(long tid, Manager env) throws Exception { .filter(conditionalResult -> conditionalResult.getStatus() == Status.ACCEPTED).count(); log.debug( - "{} reserve tablets op:{} count:{} other opids:{} opids set:{} locations:{} accepted:{}", - FateTxId.formatTid(tid), data.op, count, otherOps, opsSet, locations, opsAccepted); + "{} reserve tablets op:{} count:{} other opids:{} opids set:{} locations:{} accepted:{} wals:{}", + FateTxId.formatTid(tid), data.op, count, otherOps, opsSet, locations, opsAccepted, wals); // while there are table lock a tablet can be concurrently deleted, so should always see // tablets @@ -98,7 +102,7 @@ public long isReady(long tid, Manager env) throws Exception { FateTxId.formatTid(tid)); } - if (locations > 0 || otherOps > 0) { + if (locations > 0 || otherOps > 0 || wals > 0) { // need to wait on these tablets return Math.max(1000, count); } From 5c454e12a144e3543175d2cbc0b54bc0900749b1 Mon Sep 17 00:00:00 2001 From: Keith Turner Date: Tue, 31 Oct 2023 11:00:52 -0400 Subject: [PATCH 11/12] Refactors tablet mgmt iterator to share decision code w/ TGW (#3904) The TabletGroupWatcher uses the TabletManagementIterator to filter tablets that need attention. The TabletMgmtIter had its own custom code to make decision about tablets. This commit modifies the TabletMgmtIterator to use the same code as the TGW when making decisions about tablets. This makes it easier to reason about and change the behavior of the TGW. In making the TGW and TabletMgmtIter share code a change was also made to how they get information to make decisions. A new class called TabletManagementParameters was introduced that contains an immutable snapshot of the information that both classes need to make decisions. With this change the iterator and TGW are using the same information for a pass over the metadata table, in the past the information was obtained at different times and could have been different due to race conditions. These race conditions were probably not harmful, but removing them makes the code easier to reason about. Also move some code outside of TGW away from using the TableMgmtIterator. Need to move all code outside of TGW away from using this iterator in order to make the code easier to maintain. Left TODOs in the code about this and will open follow on issues. This commit also changes how shutting down servers are handled. Each TGW gets its servers to shutdown from its dependent TGW. If the TGW has no dependent it gets it from the manager. This allows the set of servers to shutdown to percolate through the TGWs, each filtering out any active servers it sees at its level. So for example the USER TGW will only make a server to shutdown available to the METADATA TGW when it has not seen it in a full scan. --- .../manager/balancer/TabletServerIdImpl.java | 4 + .../accumulo/core/metadata/TabletState.java | 49 +--- .../server/manager/state/CurrentState.java | 53 ---- .../state/LoggingTabletStateStore.java | 5 +- .../manager/state/MetaDataStateStore.java | 17 +- .../manager/state/RootTabletStateStore.java | 12 +- .../server/manager/state/TabletGoalState.java | 184 ++++++++++++ .../state/TabletManagementIterator.java | 263 ++---------------- .../state/TabletManagementParameters.java | 252 +++++++++++++++++ .../state/TabletManagementScanner.java | 11 +- .../manager/state/TabletStateStore.java | 19 +- .../manager/state/ZooTabletStateStore.java | 6 +- .../server/util/FindOfflineTablets.java | 37 +-- .../util/ListOnlineOnDemandTablets.java | 25 +- .../gc/GarbageCollectWriteAheadLogs.java | 29 +- .../gc/GarbageCollectWriteAheadLogsTest.java | 20 +- .../org/apache/accumulo/manager/Manager.java | 138 +-------- .../accumulo/manager/TabletGroupWatcher.java | 205 +++++++------- .../manager/tableOps/delete/CleanUp.java | 37 ++- .../manager/tableOps/merge/DeleteRows.java | 1 - .../manager/upgrade/UpgradeCoordinator.java | 18 +- server/monitor/pom.xml | 4 - .../monitor/rest/tables/TablesResource.java | 34 +-- .../org/apache/accumulo/test/LocatorIT.java | 27 +- .../test/ManagerRepairsDualAssignmentIT.java | 50 ++-- .../test/functional/AssignLocationModeIT.java | 11 +- .../functional/CompactLocationModeIT.java | 14 +- .../test/functional/ManagerAssignmentIT.java | 47 ++-- .../TabletManagementIteratorIT.java | 117 +++----- .../test/manager/SuspendedTabletsIT.java | 24 +- .../test/performance/NullTserver.java | 7 +- 31 files changed, 812 insertions(+), 908 deletions(-) delete mode 100644 server/base/src/main/java/org/apache/accumulo/server/manager/state/CurrentState.java create mode 100644 server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletGoalState.java create mode 100644 server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletManagementParameters.java diff --git a/core/src/main/java/org/apache/accumulo/core/manager/balancer/TabletServerIdImpl.java b/core/src/main/java/org/apache/accumulo/core/manager/balancer/TabletServerIdImpl.java index 2fefb466d33..013515a2530 100644 --- a/core/src/main/java/org/apache/accumulo/core/manager/balancer/TabletServerIdImpl.java +++ b/core/src/main/java/org/apache/accumulo/core/manager/balancer/TabletServerIdImpl.java @@ -44,6 +44,10 @@ public TabletServerIdImpl(TServerInstance tServerInstance) { this.tServerInstance = requireNonNull(tServerInstance); } + public TServerInstance getTServerInstance() { + return tServerInstance; + } + @Override public String getHost() { return tServerInstance.getHostAndPort().getHost(); diff --git a/core/src/main/java/org/apache/accumulo/core/metadata/TabletState.java b/core/src/main/java/org/apache/accumulo/core/metadata/TabletState.java index 4be39cf906c..9fcf8add3f8 100644 --- a/core/src/main/java/org/apache/accumulo/core/metadata/TabletState.java +++ b/core/src/main/java/org/apache/accumulo/core/metadata/TabletState.java @@ -18,29 +18,18 @@ */ package org.apache.accumulo.core.metadata; -import java.util.Map; import java.util.Set; -import org.apache.accumulo.core.data.TabletId; -import org.apache.accumulo.core.dataImpl.TabletIdImpl; -import org.apache.accumulo.core.manager.balancer.TabletServerIdImpl; import org.apache.accumulo.core.metadata.schema.TabletMetadata; -import org.apache.accumulo.core.spi.balancer.TabletBalancer; -import org.apache.accumulo.core.spi.balancer.data.TabletServerId; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public enum TabletState { - UNASSIGNED, ASSIGNED, HOSTED, ASSIGNED_TO_DEAD_SERVER, SUSPENDED, NEEDS_REASSIGNMENT; + UNASSIGNED, ASSIGNED, HOSTED, ASSIGNED_TO_DEAD_SERVER, SUSPENDED; private static Logger log = LoggerFactory.getLogger(TabletState.class); public static TabletState compute(TabletMetadata tm, Set liveTServers) { - return compute(tm, liveTServers, null, null); - } - - public static TabletState compute(TabletMetadata tm, Set liveTServers, - TabletBalancer balancer, Map tserverGroups) { TabletMetadata.Location current = null; TabletMetadata.Location future = null; if (tm.hasCurrent()) { @@ -53,42 +42,6 @@ public static TabletState compute(TabletMetadata tm, Set liveTS : TabletState.ASSIGNED_TO_DEAD_SERVER; } else if (current != null) { if (liveTServers.contains(current.getServerInstance())) { - if (balancer != null) { - var tsii = new TabletServerIdImpl(current.getServerInstance()); - var resourceGroup = tserverGroups.get(tsii); - - if (resourceGroup != null) { - var reassign = balancer.needsReassignment(new TabletBalancer.CurrentAssignment() { - @Override - public TabletId getTablet() { - return new TabletIdImpl(tm.getExtent()); - } - - @Override - public TabletServerId getTabletServer() { - return tsii; - } - - @Override - public String getResourceGroup() { - return resourceGroup; - } - }); - - if (reassign) { - return TabletState.NEEDS_REASSIGNMENT; - } - } else { - // A tablet server should always have a resource group, however there is a race - // conditions where the resource group map was read before a tablet server came into - // existence. Another possible cause for an absent resource group is a bug in accumulo. - // In either case do not call the balancer for now with the assumption that the resource - // group will be available later. Log a message in case it is a bug. - log.trace( - "Could not find resource group for tserver {}, so did not consult balancer. Assuming this is a temporary race condition.", - current.getServerInstance()); - } - } return TabletState.HOSTED; } else { return TabletState.ASSIGNED_TO_DEAD_SERVER; diff --git a/server/base/src/main/java/org/apache/accumulo/server/manager/state/CurrentState.java b/server/base/src/main/java/org/apache/accumulo/server/manager/state/CurrentState.java deleted file mode 100644 index 1b72fa2a55d..00000000000 --- a/server/base/src/main/java/org/apache/accumulo/server/manager/state/CurrentState.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.accumulo.server.manager.state; - -import java.util.Map; -import java.util.Set; - -import org.apache.accumulo.core.data.TableId; -import org.apache.accumulo.core.dataImpl.KeyExtent; -import org.apache.accumulo.core.manager.thrift.ManagerState; -import org.apache.accumulo.core.metadata.TServerInstance; -import org.apache.accumulo.server.manager.LiveTServerSet.LiveTServersSnapshot; - -public interface CurrentState { - - Set onlineTables(); - - Set onlineTabletServers(); - - LiveTServersSnapshot tserversSnapshot(); - - Set shutdownServers(); - - /** - * Provide an immutable snapshot view of migrating tablets. Objects contained in the set may still - * be mutable. - */ - Set migrationsSnapshot(); - - ManagerState getManagerState(); - - // ELASTICITIY_TODO it would be nice if this method could take DataLevel as an argument and only - // retrieve information about compactions in that data level. Attempted this and a lot of - // refactoring was needed to get that small bit of information to this method. Would be best to - // address this after issue. May be best to attempt this after #3576. - Map> getCompactionHints(); -} diff --git a/server/base/src/main/java/org/apache/accumulo/server/manager/state/LoggingTabletStateStore.java b/server/base/src/main/java/org/apache/accumulo/server/manager/state/LoggingTabletStateStore.java index ce2b3ede023..8545ecd7c75 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/manager/state/LoggingTabletStateStore.java +++ b/server/base/src/main/java/org/apache/accumulo/server/manager/state/LoggingTabletStateStore.java @@ -55,8 +55,9 @@ public String name() { } @Override - public ClosableIterator iterator(List ranges) { - return wrapped.iterator(ranges); + public ClosableIterator iterator(List ranges, + TabletManagementParameters parameters) { + return wrapped.iterator(ranges, parameters); } @Override diff --git a/server/base/src/main/java/org/apache/accumulo/server/manager/state/MetaDataStateStore.java b/server/base/src/main/java/org/apache/accumulo/server/manager/state/MetaDataStateStore.java index 2a2ee11c095..a182745da5d 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/manager/state/MetaDataStateStore.java +++ b/server/base/src/main/java/org/apache/accumulo/server/manager/state/MetaDataStateStore.java @@ -30,26 +30,25 @@ import org.apache.accumulo.core.metadata.schema.Ample.DataLevel; import org.apache.accumulo.core.metadata.schema.TabletMetadata; +import com.google.common.base.Preconditions; + class MetaDataStateStore extends AbstractTabletStateStore implements TabletStateStore { protected final ClientContext context; - protected final CurrentState state; private final String targetTableName; private final Ample ample; private final DataLevel level; - protected MetaDataStateStore(DataLevel level, ClientContext context, CurrentState state, - String targetTableName) { + protected MetaDataStateStore(DataLevel level, ClientContext context, String targetTableName) { super(context); this.level = level; this.context = context; - this.state = state; this.ample = context.getAmple(); this.targetTableName = targetTableName; } - MetaDataStateStore(DataLevel level, ClientContext context, CurrentState state) { - this(level, context, state, MetadataTable.NAME); + MetaDataStateStore(DataLevel level, ClientContext context) { + this(level, context, MetadataTable.NAME); } @Override @@ -58,8 +57,10 @@ public DataLevel getLevel() { } @Override - public ClosableIterator iterator(List ranges) { - return new TabletManagementScanner(context, ranges, state, targetTableName); + public ClosableIterator iterator(List ranges, + TabletManagementParameters parameters) { + Preconditions.checkArgument(parameters.getLevel() == getLevel()); + return new TabletManagementScanner(context, ranges, parameters, targetTableName); } @Override diff --git a/server/base/src/main/java/org/apache/accumulo/server/manager/state/RootTabletStateStore.java b/server/base/src/main/java/org/apache/accumulo/server/manager/state/RootTabletStateStore.java index 4b574ade13a..97c9f7ec32d 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/manager/state/RootTabletStateStore.java +++ b/server/base/src/main/java/org/apache/accumulo/server/manager/state/RootTabletStateStore.java @@ -26,15 +26,19 @@ import org.apache.accumulo.core.metadata.RootTable; import org.apache.accumulo.core.metadata.schema.Ample.DataLevel; +import com.google.common.base.Preconditions; + class RootTabletStateStore extends MetaDataStateStore { - RootTabletStateStore(DataLevel level, ClientContext context, CurrentState state) { - super(level, context, state, RootTable.NAME); + RootTabletStateStore(DataLevel level, ClientContext context) { + super(level, context, RootTable.NAME); } @Override - public ClosableIterator iterator(List ranges) { - return new TabletManagementScanner(context, ranges, state, RootTable.NAME); + public ClosableIterator iterator(List ranges, + TabletManagementParameters parameters) { + Preconditions.checkArgument(parameters.getLevel() == getLevel()); + return new TabletManagementScanner(context, ranges, parameters, RootTable.NAME); } @Override diff --git a/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletGoalState.java b/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletGoalState.java new file mode 100644 index 00000000000..4dedbe0bf81 --- /dev/null +++ b/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletGoalState.java @@ -0,0 +1,184 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.accumulo.server.manager.state; + +import org.apache.accumulo.core.data.TabletId; +import org.apache.accumulo.core.dataImpl.KeyExtent; +import org.apache.accumulo.core.dataImpl.TabletIdImpl; +import org.apache.accumulo.core.manager.balancer.TabletServerIdImpl; +import org.apache.accumulo.core.metadata.TServerInstance; +import org.apache.accumulo.core.metadata.TabletState; +import org.apache.accumulo.core.metadata.schema.Ample; +import org.apache.accumulo.core.metadata.schema.TabletMetadata; +import org.apache.accumulo.core.metadata.schema.TabletOperationType; +import org.apache.accumulo.core.spi.balancer.TabletBalancer; +import org.apache.accumulo.core.spi.balancer.data.TabletServerId; +import org.apache.accumulo.core.tablet.thrift.TUnloadTabletGoal; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.base.Preconditions; + +public enum TabletGoalState { + + HOSTED(TUnloadTabletGoal.UNKNOWN), + UNASSIGNED(TUnloadTabletGoal.UNASSIGNED), + SUSPENDED(TUnloadTabletGoal.SUSPENDED); + + private final TUnloadTabletGoal unloadGoal; + + TabletGoalState(TUnloadTabletGoal unloadGoal) { + this.unloadGoal = unloadGoal; + } + + /** The purpose of unloading this tablet. */ + public TUnloadTabletGoal howUnload() { + return unloadGoal; + } + + private static final Logger log = LoggerFactory.getLogger(TabletGoalState.class); + + public static TabletGoalState compute(TabletMetadata tm, TabletState currentState, + TabletBalancer balancer, TabletManagementParameters params) { + Preconditions.checkArgument(Ample.DataLevel.of(tm.getTableId()) == params.getLevel(), + "Tablet %s not in expected level %s", tm.getExtent(), params.getLevel()); + + // Always follow through with assignments + if (currentState == TabletState.ASSIGNED) { + return HOSTED; + } + + KeyExtent extent = tm.getExtent(); + // Shutting down? + TabletGoalState systemGoalState = getSystemGoalState(tm, params); + + if (systemGoalState == TabletGoalState.HOSTED) { + if (!params.isParentLevelUpgraded()) { + // The place where this tablet stores its metadata was not upgraded, so do not assign this + // tablet yet. + return UNASSIGNED; + } + + // When an operation id is set tablets need to be unassigned unless there are still wals. When + // there are wals the tablet needs to be hosted to recover data in them. However, deleting + // tablets do not need to recover wals. + if (tm.getOperationId() != null && (tm.getLogs().isEmpty() + || tm.getOperationId().getType() == TabletOperationType.DELETING)) { + return TabletGoalState.UNASSIGNED; + } + + if (!params.isTableOnline(tm.getTableId())) { + return UNASSIGNED; + } + + switch (tm.getHostingGoal()) { + case NEVER: + return UNASSIGNED; + case ONDEMAND: + if (!tm.getHostingRequested()) { + return UNASSIGNED; + } + } + + TServerInstance dest = params.getMigrations().get(extent); + if (dest != null && tm.hasCurrent() && !dest.equals(tm.getLocation().getServerInstance())) { + return UNASSIGNED; + } + + if (currentState == TabletState.HOSTED && balancer != null) { + // see if the balancer thinks this tablet needs to be unassigned + + Preconditions.checkArgument( + tm.getLocation().getType() == TabletMetadata.LocationType.CURRENT, + "Expected current tablet location %s %s", tm.getExtent(), tm.getLocation()); + var tsii = new TabletServerIdImpl(tm.getLocation().getServerInstance()); + + var resourceGroup = params.getResourceGroup(tm.getLocation().getServerInstance()); + + if (resourceGroup != null) { + var reassign = balancer.needsReassignment(new TabletBalancer.CurrentAssignment() { + @Override + public TabletId getTablet() { + return new TabletIdImpl(tm.getExtent()); + } + + @Override + public TabletServerId getTabletServer() { + return tsii; + } + + @Override + public String getResourceGroup() { + return resourceGroup; + } + }); + + if (reassign) { + return UNASSIGNED; + } + } else { + // ELASTICITY_TODO this log level was set to error so that this case can be examined for + // bugs. A tablet server should always have a resource group. If there are unavoidable + // race conditions for getting tablet servers and their RGs, that that should be handled + // in the TabletManagementParameters data acquisition phase so that not all code has to + // deal with it. Eventually this log level should possibly be adjusted or converted to an + // exception. + log.error( + "Could not find resource group for tserver {}, so did not consult balancer. Need to determine the cause of this.", + tm.getLocation().getServerInstance()); + } + } + + if (tm.hasCurrent() + && params.getServersToShutdown().contains(tm.getLocation().getServerInstance())) { + if (params.canSuspendTablets()) { + return SUSPENDED; + } else { + return UNASSIGNED; + } + } + } + return systemGoalState; + } + + private static TabletGoalState getSystemGoalState(TabletMetadata tm, + TabletManagementParameters params) { + switch (params.getManagerState()) { + case NORMAL: + return HOSTED; + case HAVE_LOCK: // fall-through intended + case INITIAL: // fall-through intended + case SAFE_MODE: + if (tm.getExtent().isMeta()) { + return HOSTED; + } + return TabletGoalState.UNASSIGNED; + case UNLOAD_METADATA_TABLETS: + if (tm.getExtent().isRootTablet()) { + return HOSTED; + } + return UNASSIGNED; + case UNLOAD_ROOT_TABLET: + case STOP: + return UNASSIGNED; + default: + throw new IllegalStateException("Unknown Manager State"); + } + } +} diff --git a/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletManagementIterator.java b/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletManagementIterator.java index 76ac7576a07..c4e3d70bdb5 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletManagementIterator.java +++ b/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletManagementIterator.java @@ -18,19 +18,11 @@ */ package org.apache.accumulo.server.manager.state; -import static org.apache.accumulo.core.util.LazySingletons.GSON; - import java.io.IOException; -import java.lang.reflect.Type; import java.util.ArrayList; -import java.util.Arrays; -import java.util.Base64; -import java.util.Collection; import java.util.Collections; import java.util.EnumSet; -import java.util.HashMap; import java.util.HashSet; -import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; @@ -38,24 +30,19 @@ import org.apache.accumulo.core.client.IteratorSetting; import org.apache.accumulo.core.client.ScannerBase; -import org.apache.accumulo.core.client.admin.TabletHostingGoal; import org.apache.accumulo.core.conf.AccumuloConfiguration; import org.apache.accumulo.core.conf.ConfigurationCopy; import org.apache.accumulo.core.conf.ConfigurationTypeHelper; import org.apache.accumulo.core.conf.Property; import org.apache.accumulo.core.data.Key; -import org.apache.accumulo.core.data.TableId; import org.apache.accumulo.core.data.Value; -import org.apache.accumulo.core.dataImpl.KeyExtent; import org.apache.accumulo.core.iterators.IteratorEnvironment; import org.apache.accumulo.core.iterators.SkippingIterator; import org.apache.accumulo.core.iterators.SortedKeyValueIterator; import org.apache.accumulo.core.iterators.user.WholeRowIterator; -import org.apache.accumulo.core.manager.balancer.TabletServerIdImpl; import org.apache.accumulo.core.manager.state.TabletManagement; import org.apache.accumulo.core.manager.state.TabletManagement.ManagementAction; import org.apache.accumulo.core.manager.thrift.ManagerState; -import org.apache.accumulo.core.metadata.TServerInstance; import org.apache.accumulo.core.metadata.TabletState; import org.apache.accumulo.core.metadata.schema.DataFileValue; import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.CurrentLocationColumnFamily; @@ -69,24 +56,15 @@ import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.SuspendLocationColumn; import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.TabletColumnFamily; import org.apache.accumulo.core.metadata.schema.TabletMetadata; -import org.apache.accumulo.core.metadata.schema.TabletOperationType; import org.apache.accumulo.core.spi.balancer.SimpleLoadBalancer; import org.apache.accumulo.core.spi.balancer.TabletBalancer; -import org.apache.accumulo.core.spi.balancer.data.TabletServerId; import org.apache.accumulo.core.spi.compaction.CompactionKind; -import org.apache.accumulo.core.util.AddressUtil; import org.apache.accumulo.server.compaction.CompactionJobGenerator; import org.apache.accumulo.server.iterators.TabletIteratorEnvironment; -import org.apache.accumulo.server.manager.LiveTServerSet.LiveTServersSnapshot; import org.apache.accumulo.server.manager.balancer.BalancerEnvironmentImpl; -import org.apache.hadoop.io.DataInputBuffer; -import org.apache.hadoop.io.DataOutputBuffer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.base.Joiner; -import com.google.gson.reflect.TypeToken; - /** * Iterator used by the TabletGroupWatcher threads in the Manager. This iterator returns * TabletManagement objects for each Tablet that needs some type of action performed on it by the @@ -94,146 +72,10 @@ */ public class TabletManagementIterator extends SkippingIterator { private static final Logger LOG = LoggerFactory.getLogger(TabletManagementIterator.class); - - private static final String SERVERS_OPTION = "servers"; - private static final String TABLES_OPTION = "tables"; - private static final String MIGRATIONS_OPTION = "migrations"; - private static final String MANAGER_STATE_OPTION = "managerState"; - private static final String SHUTTING_DOWN_OPTION = "shuttingDown"; - private static final String RESOURCE_GROUPS = "resourceGroups"; - private static final String TSERVER_GROUP_PREFIX = "serverGroups_"; - private static final String COMPACTION_HINTS_OPTIONS = "compactionHints"; + private static final String TABLET_GOAL_STATE_PARAMS_OPTION = "tgsParams"; private CompactionJobGenerator compactionGenerator; private TabletBalancer balancer; - private static void setCurrentServers(final IteratorSetting cfg, - final Set goodServers) { - if (goodServers != null) { - List servers = new ArrayList<>(); - for (TServerInstance server : goodServers) { - servers.add(server.getHostPortSession()); - } - cfg.addOption(SERVERS_OPTION, Joiner.on(",").join(servers)); - } - } - - private static void setOnlineTables(final IteratorSetting cfg, final Set onlineTables) { - if (onlineTables != null) { - cfg.addOption(TABLES_OPTION, Joiner.on(",").join(onlineTables)); - } - } - - private static void setMigrations(final IteratorSetting cfg, - final Collection migrations) { - DataOutputBuffer buffer = new DataOutputBuffer(); - try { - for (KeyExtent extent : migrations) { - extent.writeTo(buffer); - } - } catch (Exception ex) { - throw new RuntimeException(ex); - } - String encoded = - Base64.getEncoder().encodeToString(Arrays.copyOf(buffer.getData(), buffer.getLength())); - cfg.addOption(MIGRATIONS_OPTION, encoded); - } - - private static void setManagerState(final IteratorSetting cfg, final ManagerState state) { - cfg.addOption(MANAGER_STATE_OPTION, state.toString()); - } - - private static void setShuttingDown(final IteratorSetting cfg, - final Set servers) { - if (servers != null) { - cfg.addOption(SHUTTING_DOWN_OPTION, Joiner.on(",").join(servers)); - } - } - - private static void setCompactionHints(final IteratorSetting cfg, - Map> allHints) { - cfg.addOption(COMPACTION_HINTS_OPTIONS, GSON.get().toJson(allHints)); - } - - private static void setTServerResourceGroups(final IteratorSetting cfg, - Map> tServerResourceGroups) { - if (tServerResourceGroups == null) { - return; - } - cfg.addOption(RESOURCE_GROUPS, Joiner.on(",").join(tServerResourceGroups.keySet())); - for (Entry> entry : tServerResourceGroups.entrySet()) { - cfg.addOption(TSERVER_GROUP_PREFIX + entry.getKey(), Joiner.on(",").join(entry.getValue())); - } - } - - private static Map parseTServerResourceGroups(Map options) { - Map resourceGroups = new HashMap<>(); - String groups = options.get(RESOURCE_GROUPS); - if (groups != null) { - for (String groupName : groups.split(",")) { - String groupServers = options.get(TSERVER_GROUP_PREFIX + groupName); - if (groupServers != null) { - Set servers = parseServers(groupServers); - servers.forEach(server -> resourceGroups.put(new TabletServerIdImpl(server), groupName)); - } - } - } - return resourceGroups; - } - - private static Set parseMigrations(final String migrations) { - Set result = new HashSet<>(); - if (migrations != null) { - try { - DataInputBuffer buffer = new DataInputBuffer(); - byte[] data = Base64.getDecoder().decode(migrations); - buffer.reset(data, data.length); - while (buffer.available() > 0) { - result.add(KeyExtent.readFrom(buffer)); - } - } catch (Exception ex) { - throw new RuntimeException(ex); - } - } - return result; - } - - private static Set parseTableIDs(final String tableIDs) { - Set result = new HashSet<>(); - if (tableIDs != null) { - for (String tableID : tableIDs.split(",")) { - result.add(TableId.of(tableID)); - } - } - return result; - } - - private static Set parseServers(final String servers) { - Set result = new HashSet<>(); - if (servers != null) { - // parse "host:port[INSTANCE]" - if (!servers.isEmpty()) { - for (String part : servers.split(",")) { - String[] parts = part.split("\\[", 2); - String hostport = parts[0]; - String instance = parts[1]; - if (instance != null && instance.endsWith("]")) { - instance = instance.substring(0, instance.length() - 1); - } - result.add(new TServerInstance(AddressUtil.parseAddress(hostport, false), instance)); - } - } - } - return result; - } - - private static Map> parseCompactionHints(String json) { - if (json == null) { - return Map.of(); - } - Type tt = new TypeToken>>() {}.getType(); - return GSON.get().fromJson(json, tt); - } - private static boolean shouldReturnDueToSplit(final TabletMetadata tm, final long splitThreshold) { final long sumOfFileSizes = @@ -244,57 +86,38 @@ private static boolean shouldReturnDueToSplit(final TabletMetadata tm, return shouldSplit; } - private boolean shouldReturnDueToLocation(final TabletMetadata tm, - final Set onlineTables, final Set current) { + private boolean shouldReturnDueToLocation(final TabletMetadata tm) { - if (migrations.contains(tm.getExtent())) { + if (tabletMgmtParams.getMigrations().containsKey(tm.getExtent())) { + // Ideally only the state and goalState would need to be used to determine if a tablet should + // be returned. However, the Manager/TGW currently needs everything in the migrating set + // returned so it can update in memory maps it has. If this were improved then this case would + // not be needed. return true; } - // is the table supposed to be online or offline? - final boolean shouldBeOnline = - onlineTables.contains(tm.getTableId()) && tm.getOperationId() == null; - - TabletState state = TabletState.compute(tm, current, balancer, tserverResourceGroups); + TabletState state = TabletState.compute(tm, tabletMgmtParams.getOnlineTsevers()); + TabletGoalState goalState = TabletGoalState.compute(tm, state, balancer, tabletMgmtParams); if (LOG.isTraceEnabled()) { - LOG.trace( - "{} is {}. Table is {}line. Tablet hosting goal is {}, hostingRequested: {}, opId: {}", - tm.getExtent(), state, (shouldBeOnline ? "on" : "off"), tm.getHostingGoal(), - tm.getHostingRequested(), tm.getOperationId()); + LOG.trace("extent:{} state:{} goalState:{} hostingGoal:{}, hostingRequested: {}, opId: {}", + tm.getExtent(), state, goalState, tm.getHostingGoal(), tm.getHostingRequested(), + tm.getOperationId()); } - switch (state) { - case ASSIGNED: - // we always want data about assigned tablets - return true; + + switch (goalState) { case HOSTED: - if (!shouldBeOnline || tm.getHostingGoal() == TabletHostingGoal.NEVER - || (tm.getHostingGoal() == TabletHostingGoal.ONDEMAND && !tm.getHostingRequested())) { - return true; - } - break; - case ASSIGNED_TO_DEAD_SERVER: - case NEEDS_REASSIGNMENT: - return true; + return state != TabletState.HOSTED; case SUSPENDED: + return state != TabletState.SUSPENDED; case UNASSIGNED: - if (shouldBeOnline && (tm.getHostingGoal() == TabletHostingGoal.ALWAYS - || (tm.getHostingGoal() == TabletHostingGoal.ONDEMAND && tm.getHostingRequested()))) { - return true; - } - // If the Tablet has walogs and operation id then need to return so - // TGW can bring online to process the logs - if (!tm.getLogs().isEmpty() && tm.getOperationId() != null - && tm.getOperationId().getType() == TabletOperationType.MERGING) { - return true; - } - break; + return state != TabletState.UNASSIGNED; default: - throw new AssertionError("Inconceivable! The tablet is an unrecognized state: " + state); + throw new IllegalStateException("unknown goal state " + goalState); } - return false; } - public static void configureScanner(final ScannerBase scanner, final CurrentState state) { + public static void configureScanner(final ScannerBase scanner, + final TabletManagementParameters tabletMgmtParams) { // TODO so many columns are being fetch it may not make sense to fetch columns TabletColumnFamily.PREV_ROW_COLUMN.fetch(scanner); ServerColumnFamily.DIRECTORY_COLUMN.fetch(scanner); @@ -311,17 +134,7 @@ public static void configureScanner(final ScannerBase scanner, final CurrentStat scanner.addScanIterator(new IteratorSetting(1000, "wholeRows", WholeRowIterator.class)); IteratorSetting tabletChange = new IteratorSetting(1001, "ManagerTabletInfoIterator", TabletManagementIterator.class); - if (state != null) { - LiveTServersSnapshot tserversSnapshot = state.tserversSnapshot(); - TabletManagementIterator.setCurrentServers(tabletChange, tserversSnapshot.getTservers()); - TabletManagementIterator.setOnlineTables(tabletChange, state.onlineTables()); - TabletManagementIterator.setMigrations(tabletChange, state.migrationsSnapshot()); - TabletManagementIterator.setManagerState(tabletChange, state.getManagerState()); - TabletManagementIterator.setShuttingDown(tabletChange, state.shutdownServers()); - TabletManagementIterator.setTServerResourceGroups(tabletChange, - tserversSnapshot.getTserverGroups()); - setCompactionHints(tabletChange, state.getCompactionHints()); - } + tabletChange.addOption(TABLET_GOAL_STATE_PARAMS_OPTION, tabletMgmtParams.serialize()); scanner.addScanIterator(tabletChange); } @@ -329,38 +142,20 @@ public static TabletManagement decode(Entry e) throws IOException { return new TabletManagement(e.getKey(), e.getValue()); } - private final Set current = new HashSet<>(); - private final Set onlineTables = new HashSet<>(); - private final Map tserverResourceGroups = new HashMap<>(); - private final Set migrations = new HashSet<>(); - private ManagerState managerState = ManagerState.NORMAL; private IteratorEnvironment env; private Key topKey = null; private Value topValue = null; + private TabletManagementParameters tabletMgmtParams = null; @Override public void init(SortedKeyValueIterator source, Map options, IteratorEnvironment env) throws IOException { super.init(source, options, env); this.env = env; - current.addAll(parseServers(options.get(SERVERS_OPTION))); - onlineTables.addAll(parseTableIDs(options.get(TABLES_OPTION))); - tserverResourceGroups.putAll(parseTServerResourceGroups(options)); - migrations.addAll(parseMigrations(options.get(MIGRATIONS_OPTION))); - String managerStateOptionValue = options.get(MANAGER_STATE_OPTION); - try { - managerState = ManagerState.valueOf(managerStateOptionValue); - } catch (RuntimeException ex) { - if (managerStateOptionValue != null) { - LOG.error("Unable to decode managerState {}", managerStateOptionValue); - } - } - Set shuttingDown = parseServers(options.get(SHUTTING_DOWN_OPTION)); - if (shuttingDown != null) { - current.removeAll(shuttingDown); - } - compactionGenerator = new CompactionJobGenerator(env.getPluginEnv(), - parseCompactionHints(options.get(COMPACTION_HINTS_OPTIONS))); + tabletMgmtParams = + TabletManagementParameters.deserialize(options.get(TABLET_GOAL_STATE_PARAMS_OPTION)); + compactionGenerator = + new CompactionJobGenerator(env.getPluginEnv(), tabletMgmtParams.getCompactionHints()); final AccumuloConfiguration conf = new ConfigurationCopy(env.getPluginEnv().getConfiguration()); BalancerEnvironmentImpl benv = new BalancerEnvironmentImpl(((TabletIteratorEnvironment) env).getServerContext()); @@ -400,7 +195,9 @@ protected void consume() throws IOException { actions.clear(); Exception error = null; try { - if (managerState != ManagerState.NORMAL || current.isEmpty() || onlineTables.isEmpty()) { + if (tabletMgmtParams.getManagerState() != ManagerState.NORMAL + || tabletMgmtParams.getOnlineTsevers().isEmpty() + || tabletMgmtParams.getOnlineTables().isEmpty()) { // when manager is in the process of starting up or shutting down return everything. actions.add(ManagementAction.NEEDS_LOCATION_UPDATE); } else { @@ -449,7 +246,7 @@ private void computeTabletManagementActions(final TabletMetadata tm, return; } - if (shouldReturnDueToLocation(tm, onlineTables, current)) { + if (shouldReturnDueToLocation(tm)) { reasonsToReturnThisTablet.add(ManagementAction.NEEDS_LOCATION_UPDATE); } diff --git a/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletManagementParameters.java b/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletManagementParameters.java new file mode 100644 index 00000000000..36f880e5d26 --- /dev/null +++ b/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletManagementParameters.java @@ -0,0 +1,252 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.accumulo.server.manager.state; + +import static java.util.stream.Collectors.toList; +import static java.util.stream.Collectors.toMap; +import static java.util.stream.Collectors.toSet; +import static java.util.stream.Collectors.toUnmodifiableMap; +import static java.util.stream.Collectors.toUnmodifiableSet; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.Arrays; +import java.util.Base64; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import java.util.function.Supplier; + +import org.apache.accumulo.core.data.AbstractId; +import org.apache.accumulo.core.data.TableId; +import org.apache.accumulo.core.dataImpl.KeyExtent; +import org.apache.accumulo.core.manager.thrift.ManagerState; +import org.apache.accumulo.core.metadata.TServerInstance; +import org.apache.accumulo.core.metadata.schema.Ample; +import org.apache.accumulo.server.manager.LiveTServerSet; +import org.apache.hadoop.io.DataInputBuffer; +import org.apache.hadoop.io.DataOutputBuffer; + +import com.google.common.base.Suppliers; +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; + +/** + * An immutable snapshot of the information needed by the TabletGroupWatcher and the + * {@link TabletManagementIterator} to make decisions about tablets. + */ +public class TabletManagementParameters { + // ELASTICITY_TODO need to unit test serialization and deserialization of this class. + private final ManagerState managerState; + private final Map parentUpgradeMap; + private final Set onlineTables; + private final Set serversToShutdown; + private final Map migrations; + + private final Ample.DataLevel level; + + private final Supplier> resourceGroups; + private final Map> tserverGroups; + private final Map> compactionHints; + private final Set onlineTservers; + private final boolean canSuspendTablets; + + public TabletManagementParameters(ManagerState managerState, + Map parentUpgradeMap, Set onlineTables, + LiveTServerSet.LiveTServersSnapshot liveTServersSnapshot, + Set serversToShutdown, Map migrations, + Ample.DataLevel level, Map> compactionHints, + boolean canSuspendTablets) { + this.managerState = managerState; + this.parentUpgradeMap = Map.copyOf(parentUpgradeMap); + // TODO could filter by level + this.onlineTables = Set.copyOf(onlineTables); + // This is already immutable, so no need to copy + this.onlineTservers = liveTServersSnapshot.getTservers(); + this.serversToShutdown = Set.copyOf(serversToShutdown); + // TODO could filter by level + this.migrations = Map.copyOf(migrations); + this.level = level; + // This is already immutable, so no need to copy + this.tserverGroups = liveTServersSnapshot.getTserverGroups(); + this.compactionHints = makeImmutable(compactionHints); + this.resourceGroups = Suppliers.memoize(() -> { + Map resourceGroups = new HashMap<>(); + TabletManagementParameters.this.tserverGroups.forEach((resourceGroup, tservers) -> tservers + .forEach(tserver -> resourceGroups.put(tserver, resourceGroup))); + return Map.copyOf(resourceGroups); + }); + this.canSuspendTablets = canSuspendTablets; + } + + private TabletManagementParameters(JsonData jdata) { + this.managerState = jdata.managerState; + this.parentUpgradeMap = Map.copyOf(jdata.parentUpgradeMap); + this.onlineTables = jdata.onlineTables.stream().map(TableId::of).collect(toUnmodifiableSet()); + this.onlineTservers = + jdata.onlineTservers.stream().map(TServerInstance::new).collect(toUnmodifiableSet()); + this.serversToShutdown = + jdata.serversToShutdown.stream().map(TServerInstance::new).collect(toUnmodifiableSet()); + this.migrations = jdata.migrations.entrySet().stream() + .collect(toUnmodifiableMap(entry -> JsonData.strToExtent(entry.getKey()), + entry -> new TServerInstance(entry.getValue()))); + this.level = jdata.level; + this.compactionHints = makeImmutable(jdata.compactionHints); + this.tserverGroups = jdata.tserverGroups.entrySet().stream().collect(toUnmodifiableMap( + Map.Entry::getKey, + entry -> entry.getValue().stream().map(TServerInstance::new).collect(toUnmodifiableSet()))); + this.resourceGroups = Suppliers.memoize(() -> { + Map resourceGroups = new HashMap<>(); + TabletManagementParameters.this.tserverGroups.forEach((resourceGroup, tservers) -> tservers + .forEach(tserver -> resourceGroups.put(tserver, resourceGroup))); + return Map.copyOf(resourceGroups); + }); + this.canSuspendTablets = jdata.canSuspendTablets; + ; + } + + public ManagerState getManagerState() { + return managerState; + } + + public boolean isParentLevelUpgraded() { + return parentUpgradeMap.get(level); + } + + public Set getOnlineTsevers() { + return onlineTservers; + } + + public Set getServersToShutdown() { + return serversToShutdown; + } + + public boolean isTableOnline(TableId tableId) { + return onlineTables.contains(tableId); + } + + public Map getMigrations() { + return migrations; + } + + public Ample.DataLevel getLevel() { + return level; + } + + public String getResourceGroup(TServerInstance tserver) { + return resourceGroups.get().get(tserver); + } + + public Map> getGroupedTServers() { + return tserverGroups; + } + + public Set getOnlineTables() { + return onlineTables; + } + + public Map> getCompactionHints() { + return compactionHints; + } + + public boolean canSuspendTablets() { + return canSuspendTablets; + } + + private static Map> + makeImmutable(Map> compactionHints) { + var copy = new HashMap>(); + compactionHints.forEach((ftxid, hints) -> copy.put(ftxid, Map.copyOf(hints))); + return Collections.unmodifiableMap(copy); + } + + private static class JsonData { + final ManagerState managerState; + final Map parentUpgradeMap; + final Collection onlineTables; + final Collection onlineTservers; + final Collection serversToShutdown; + final Map migrations; + + final Ample.DataLevel level; + + final Map> tserverGroups; + + final Map> compactionHints; + + final boolean canSuspendTablets; + + private static String toString(KeyExtent extent) { + DataOutputBuffer buffer = new DataOutputBuffer(); + try { + extent.writeTo(buffer); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + + return Base64.getEncoder() + .encodeToString(Arrays.copyOf(buffer.getData(), buffer.getLength())); + + } + + private static KeyExtent strToExtent(String kes) { + byte[] data = Base64.getDecoder().decode(kes); + DataInputBuffer buffer = new DataInputBuffer(); + buffer.reset(data, data.length); + try { + return KeyExtent.readFrom(buffer); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + JsonData(TabletManagementParameters params) { + managerState = params.managerState; + parentUpgradeMap = params.parentUpgradeMap; + onlineTables = params.onlineTables.stream().map(AbstractId::canonical).collect(toList()); + onlineTservers = params.getOnlineTsevers().stream().map(TServerInstance::getHostPortSession) + .collect(toList()); + serversToShutdown = params.serversToShutdown.stream().map(TServerInstance::getHostPortSession) + .collect(toList()); + migrations = params.migrations.entrySet().stream().collect( + toMap(entry -> toString(entry.getKey()), entry -> entry.getValue().getHostPortSession())); + level = params.level; + tserverGroups = params.getGroupedTServers().entrySet().stream() + .collect(toMap(Map.Entry::getKey, entry -> entry.getValue().stream() + .map(TServerInstance::getHostPortSession).collect(toSet()))); + compactionHints = params.compactionHints; + canSuspendTablets = params.canSuspendTablets; + } + + } + + public String serialize() { + Gson gson = new GsonBuilder().disableHtmlEscaping().create(); + return gson.toJson(new JsonData(this)); + } + + public static TabletManagementParameters deserialize(String json) { + Gson gson = new GsonBuilder().disableHtmlEscaping().create(); + JsonData jdata = gson.fromJson(json, JsonData.class); + return new TabletManagementParameters(jdata); + } + +} diff --git a/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletManagementScanner.java b/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletManagementScanner.java index e34d16304c8..c5a02ef93aa 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletManagementScanner.java +++ b/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletManagementScanner.java @@ -59,8 +59,8 @@ public class TabletManagementScanner implements ClosableIterator ranges, CurrentState state, - String tableName) { + public TabletManagementScanner(ClientContext context, List ranges, + TabletManagementParameters tmgmtParams, String tableName) { // scan over metadata table, looking for tablets in the wrong state based on the live servers // and online tables try { @@ -90,16 +90,11 @@ public TabletManagementScanner(ClientContext context, List ranges, Curren throw new RuntimeException("Error obtaining locations for table: " + tableName); } cleanable = CleanerUtil.unclosed(this, TabletManagementScanner.class, closed, log, mdScanner); - TabletManagementIterator.configureScanner(mdScanner, state); + TabletManagementIterator.configureScanner(mdScanner, tmgmtParams); mdScanner.setRanges(ranges); iter = mdScanner.iterator(); } - // This constructor is called from utilities and tests - public TabletManagementScanner(ClientContext context, Range range, String tableName) { - this(context, List.of(range), null, tableName); - } - @Override public void close() { if (closed.compareAndSet(false, true)) { diff --git a/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletStateStore.java b/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletStateStore.java index 19cc5b5832b..ee81950fe71 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletStateStore.java +++ b/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletStateStore.java @@ -36,7 +36,7 @@ /** * Interface for storing information about tablet assignments. There are three implementations: */ -public interface TabletStateStore extends ClosableIterable { +public interface TabletStateStore { /** * Get the level for this state store @@ -52,14 +52,14 @@ public interface TabletStateStore extends ClosableIterable { * Scan the information about the tablets covered by this store that have end row in the specified * ranges. */ - ClosableIterator iterator(List ranges); + ClosableIterator iterator(List ranges, + TabletManagementParameters parameters); /** * Scan the information about all tablets covered by this store.. */ - @Override - default ClosableIterator iterator() { - return iterator(List.of(MetadataSchema.TabletsSection.getRange())); + default ClosableIterator iterator(TabletManagementParameters parameters) { + return iterator(List.of(MetadataSchema.TabletsSection.getRange()), parameters); } /** @@ -118,11 +118,6 @@ static TabletStateStore getStoreForTablet(KeyExtent extent, ServerContext contex } public static TabletStateStore getStoreForLevel(DataLevel level, ServerContext context) { - return getStoreForLevel(level, context, null); - } - - public static TabletStateStore getStoreForLevel(DataLevel level, ServerContext context, - CurrentState state) { TabletStateStore tss; switch (level) { @@ -130,10 +125,10 @@ public static TabletStateStore getStoreForLevel(DataLevel level, ServerContext c tss = new ZooTabletStateStore(level, context); break; case METADATA: - tss = new RootTabletStateStore(level, context, state); + tss = new RootTabletStateStore(level, context); break; case USER: - tss = new MetaDataStateStore(level, context, state); + tss = new MetaDataStateStore(level, context); break; default: throw new IllegalArgumentException("Unknown level " + level); diff --git a/server/base/src/main/java/org/apache/accumulo/server/manager/state/ZooTabletStateStore.java b/server/base/src/main/java/org/apache/accumulo/server/manager/state/ZooTabletStateStore.java index c88847c5360..05f5648497e 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/manager/state/ZooTabletStateStore.java +++ b/server/base/src/main/java/org/apache/accumulo/server/manager/state/ZooTabletStateStore.java @@ -40,6 +40,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.common.base.Preconditions; + class ZooTabletStateStore extends AbstractTabletStateStore implements TabletStateStore { private static final Logger log = LoggerFactory.getLogger(ZooTabletStateStore.class); @@ -60,7 +62,9 @@ public DataLevel getLevel() { } @Override - public ClosableIterator iterator(List ranges) { + public ClosableIterator iterator(List ranges, + TabletManagementParameters parameters) { + Preconditions.checkArgument(parameters.getLevel() == getLevel()); return new ClosableIterator<>() { boolean finished = false; diff --git a/server/base/src/main/java/org/apache/accumulo/server/util/FindOfflineTablets.java b/server/base/src/main/java/org/apache/accumulo/server/util/FindOfflineTablets.java index 2cd208a629b..fb28ed13e8d 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/util/FindOfflineTablets.java +++ b/server/base/src/main/java/org/apache/accumulo/server/util/FindOfflineTablets.java @@ -23,25 +23,18 @@ import java.util.concurrent.atomic.AtomicBoolean; import org.apache.accumulo.core.client.TableNotFoundException; -import org.apache.accumulo.core.data.Range; -import org.apache.accumulo.core.data.TableId; -import org.apache.accumulo.core.dataImpl.KeyExtent; -import org.apache.accumulo.core.manager.state.TabletManagement; import org.apache.accumulo.core.manager.state.tables.TableState; import org.apache.accumulo.core.metadata.MetadataTable; import org.apache.accumulo.core.metadata.RootTable; import org.apache.accumulo.core.metadata.TServerInstance; import org.apache.accumulo.core.metadata.TabletState; import org.apache.accumulo.core.metadata.schema.Ample.DataLevel; -import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection; import org.apache.accumulo.core.metadata.schema.TabletMetadata; import org.apache.accumulo.core.trace.TraceUtil; import org.apache.accumulo.server.ServerContext; import org.apache.accumulo.server.cli.ServerUtilOpts; import org.apache.accumulo.server.manager.LiveTServerSet; import org.apache.accumulo.server.manager.LiveTServerSet.Listener; -import org.apache.accumulo.server.manager.state.TabletManagementScanner; -import org.apache.accumulo.server.manager.state.TabletStateStore; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -82,8 +75,8 @@ public void update(LiveTServerSet current, Set deleted, tservers.startListeningForTabletServerChanges(); scanning.set(true); - Iterator zooScanner = - TabletStateStore.getStoreForLevel(DataLevel.ROOT, context).iterator(); + Iterator zooScanner = + context.getAmple().readTablets().forLevel(DataLevel.ROOT).build().iterator(); int offline = 0; @@ -97,8 +90,8 @@ public void update(LiveTServerSet current, Set deleted, } System.out.println("Scanning " + RootTable.NAME); - Iterator rootScanner = - new TabletManagementScanner(context, TabletsSection.getRange(), RootTable.NAME); + Iterator rootScanner = + context.getAmple().readTablets().forLevel(DataLevel.METADATA).build().iterator(); if ((offline = checkTablets(context, rootScanner, tservers)) > 0) { return offline; } @@ -109,32 +102,24 @@ public void update(LiveTServerSet current, Set deleted, System.out.println("Scanning " + MetadataTable.NAME); - Range range = TabletsSection.getRange(); - if (tableName != null) { - TableId tableId = context.getTableId(tableName); - range = new KeyExtent(tableId, null, null).toMetaRange(); - } - - try (TabletManagementScanner metaScanner = - new TabletManagementScanner(context, range, MetadataTable.NAME)) { - return checkTablets(context, metaScanner, tservers); + try (var metaScanner = context.getAmple().readTablets().forLevel(DataLevel.USER).build()) { + return checkTablets(context, metaScanner.iterator(), tservers); } } - private static int checkTablets(ServerContext context, Iterator scanner, + private static int checkTablets(ServerContext context, Iterator scanner, LiveTServerSet tservers) { int offline = 0; while (scanner.hasNext() && !System.out.checkError()) { - TabletManagement mti = scanner.next(); - TabletMetadata tabletMetadata = mti.getTabletMetadata(); + TabletMetadata tabletMetadata = scanner.next(); Set liveTServers = tservers.getCurrentServers(); TabletState state = TabletState.compute(tabletMetadata, liveTServers); if (state != null && state != TabletState.HOSTED - && context.getTableManager().getTableState(mti.getTabletMetadata().getTableId()) + && context.getTableManager().getTableState(tabletMetadata.getTableId()) != TableState.OFFLINE) { - System.out.println( - mti + " is " + state + " #walogs:" + mti.getTabletMetadata().getLogs().size()); + System.out.println(tabletMetadata.getExtent() + " is " + state + " #walogs:" + + tabletMetadata.getLogs().size()); offline++; } } diff --git a/server/base/src/main/java/org/apache/accumulo/server/util/ListOnlineOnDemandTablets.java b/server/base/src/main/java/org/apache/accumulo/server/util/ListOnlineOnDemandTablets.java index 8bf64157b87..70e02e5ebc1 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/util/ListOnlineOnDemandTablets.java +++ b/server/base/src/main/java/org/apache/accumulo/server/util/ListOnlineOnDemandTablets.java @@ -24,19 +24,17 @@ import org.apache.accumulo.core.client.TableNotFoundException; import org.apache.accumulo.core.client.admin.TabletHostingGoal; -import org.apache.accumulo.core.data.Range; -import org.apache.accumulo.core.manager.state.TabletManagement; import org.apache.accumulo.core.metadata.MetadataTable; import org.apache.accumulo.core.metadata.TServerInstance; import org.apache.accumulo.core.metadata.TabletState; -import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection; +import org.apache.accumulo.core.metadata.schema.Ample; import org.apache.accumulo.core.metadata.schema.TabletMetadata; +import org.apache.accumulo.core.metadata.schema.TabletsMetadata; import org.apache.accumulo.core.trace.TraceUtil; import org.apache.accumulo.server.ServerContext; import org.apache.accumulo.server.cli.ServerUtilOpts; import org.apache.accumulo.server.manager.LiveTServerSet; import org.apache.accumulo.server.manager.LiveTServerSet.Listener; -import org.apache.accumulo.server.manager.state.TabletManagementScanner; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -79,27 +77,24 @@ public void update(LiveTServerSet current, Set deleted, System.out.println("Scanning " + MetadataTable.NAME); - Range range = TabletsSection.getRange(); - - try (TabletManagementScanner metaScanner = - new TabletManagementScanner(context, range, MetadataTable.NAME)) { - return checkTablets(context, metaScanner, tservers); + try (TabletsMetadata metaScanner = + context.getAmple().readTablets().forLevel(Ample.DataLevel.USER).build()) { + return checkTablets(context, metaScanner.iterator(), tservers); } } - private static int checkTablets(ServerContext context, Iterator scanner, + private static int checkTablets(ServerContext context, Iterator scanner, LiveTServerSet tservers) { int online = 0; while (scanner.hasNext() && !System.out.checkError()) { - final TabletManagement mti = scanner.next(); - TabletMetadata tabletMetadata = mti.getTabletMetadata(); + TabletMetadata tabletMetadata = scanner.next(); Set liveTServers = tservers.getCurrentServers(); TabletState state = TabletState.compute(tabletMetadata, liveTServers); if (state == TabletState.HOSTED - && mti.getTabletMetadata().getHostingGoal() == TabletHostingGoal.ONDEMAND) { - System.out.println( - mti + " is " + state + " #walogs:" + mti.getTabletMetadata().getLogs().size()); + && tabletMetadata.getHostingGoal() == TabletHostingGoal.ONDEMAND) { + System.out.println(tabletMetadata.getExtent() + " is " + state + " #walogs:" + + tabletMetadata.getLogs().size()); online++; } } diff --git a/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectWriteAheadLogs.java b/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectWriteAheadLogs.java index f783d77ac5f..fba1867b475 100644 --- a/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectWriteAheadLogs.java +++ b/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectWriteAheadLogs.java @@ -18,6 +18,12 @@ */ package org.apache.accumulo.gc; +import static org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.LAST; +import static org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.LOCATION; +import static org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.LOGS; +import static org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.PREV_ROW; +import static org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.SUSPEND; + import java.io.FileNotFoundException; import java.io.IOException; import java.util.Collection; @@ -31,7 +37,6 @@ import org.apache.accumulo.core.gc.thrift.GCStatus; import org.apache.accumulo.core.gc.thrift.GcCycleStats; -import org.apache.accumulo.core.manager.state.TabletManagement; import org.apache.accumulo.core.metadata.TServerInstance; import org.apache.accumulo.core.metadata.TabletState; import org.apache.accumulo.core.metadata.schema.Ample.DataLevel; @@ -45,7 +50,6 @@ import org.apache.accumulo.server.log.WalStateManager.WalMarkerException; import org.apache.accumulo.server.log.WalStateManager.WalState; import org.apache.accumulo.server.manager.LiveTServerSet; -import org.apache.accumulo.server.manager.state.TabletStateStore; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.slf4j.Logger; @@ -64,7 +68,7 @@ public class GarbageCollectWriteAheadLogs { private final VolumeManager fs; private final LiveTServerSet liveServers; private final WalStateManager walMarker; - private final Iterable store; + private final Iterable store; /** * Creates a new GC WAL object. @@ -79,9 +83,12 @@ public class GarbageCollectWriteAheadLogs { this.liveServers = liveServers; this.walMarker = new WalStateManager(context); this.store = () -> Iterators.concat( - TabletStateStore.getStoreForLevel(DataLevel.ROOT, context).iterator(), - TabletStateStore.getStoreForLevel(DataLevel.METADATA, context).iterator(), - TabletStateStore.getStoreForLevel(DataLevel.USER, context).iterator()); + context.getAmple().readTablets().forLevel(DataLevel.ROOT) + .fetch(LOCATION, LAST, LOGS, PREV_ROW, SUSPEND).checkConsistency().build().iterator(), + context.getAmple().readTablets().forLevel(DataLevel.METADATA) + .fetch(LOCATION, LAST, LOGS, PREV_ROW, SUSPEND).checkConsistency().build().iterator(), + context.getAmple().readTablets().forLevel(DataLevel.USER) + .fetch(LOCATION, LAST, LOGS, PREV_ROW, SUSPEND).checkConsistency().build().iterator()); } /** @@ -93,7 +100,7 @@ public class GarbageCollectWriteAheadLogs { */ @VisibleForTesting GarbageCollectWriteAheadLogs(ServerContext context, VolumeManager fs, - LiveTServerSet liveTServerSet, WalStateManager walMarker, Iterable store) { + LiveTServerSet liveTServerSet, WalStateManager walMarker, Iterable store) { this.context = context; this.fs = fs; this.liveServers = liveTServerSet; @@ -275,13 +282,11 @@ private Map removeEntriesInUse(Map idsToIgnore = - candidates.remove(mti.getTabletMetadata().getLocation().getServerInstance()); + Set idsToIgnore = candidates.remove(tabletMetadata.getLocation().getServerInstance()); if (idsToIgnore != null) { result.keySet().removeAll(idsToIgnore); recoveryLogs.keySet().removeAll(idsToIgnore); @@ -289,7 +294,7 @@ private Map removeEntriesInUse(Map walogs = Collections.emptyList(); - private final TabletManagement tabletAssignedToServer1; - private final TabletManagement tabletAssignedToServer2; + private final TabletMetadata tabletAssignedToServer1; + private final TabletMetadata tabletAssignedToServer2; { try { - tabletAssignedToServer1 = new TabletManagement(Set.of(), + tabletAssignedToServer1 = TabletMetadata.builder(extent).putLocation(Location.current(server1)) - .putHostingGoal(TabletHostingGoal.ALWAYS).build(LAST, SUSPEND, LOGS), - ""); - tabletAssignedToServer2 = new TabletManagement(Set.of(), + .putHostingGoal(TabletHostingGoal.ALWAYS).build(LAST, SUSPEND, LOGS); + tabletAssignedToServer2 = TabletMetadata.builder(extent).putLocation(Location.current(server2)) - .putHostingGoal(TabletHostingGoal.NEVER).build(LAST, SUSPEND, LOGS), - ""); + .putHostingGoal(TabletHostingGoal.NEVER).build(LAST, SUSPEND, LOGS); } catch (Exception ex) { throw new RuntimeException(ex); } } - private final Iterable tabletOnServer1List = + private final Iterable tabletOnServer1List = Collections.singletonList(tabletAssignedToServer1); - private final Iterable tabletOnServer2List = + private final Iterable tabletOnServer2List = Collections.singletonList(tabletAssignedToServer2); @Test diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java b/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java index b5de7196d0d..6608b566381 100644 --- a/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java @@ -99,7 +99,6 @@ import org.apache.accumulo.core.metadata.TServerInstance; import org.apache.accumulo.core.metadata.schema.Ample.DataLevel; import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.TabletColumnFamily; -import org.apache.accumulo.core.metadata.schema.TabletMetadata; import org.apache.accumulo.core.metrics.MetricsUtil; import org.apache.accumulo.core.security.Authorizations; import org.apache.accumulo.core.spi.balancer.BalancerEnvironment; @@ -108,7 +107,6 @@ import org.apache.accumulo.core.spi.balancer.data.TServerStatus; import org.apache.accumulo.core.spi.balancer.data.TabletMigration; import org.apache.accumulo.core.spi.balancer.data.TabletServerId; -import org.apache.accumulo.core.tablet.thrift.TUnloadTabletGoal; import org.apache.accumulo.core.trace.TraceUtil; import org.apache.accumulo.core.util.Halt; import org.apache.accumulo.core.util.Retry; @@ -132,7 +130,6 @@ import org.apache.accumulo.server.manager.LiveTServerSet.LiveTServersSnapshot; import org.apache.accumulo.server.manager.LiveTServerSet.TServerConnection; import org.apache.accumulo.server.manager.balancer.BalancerEnvironmentImpl; -import org.apache.accumulo.server.manager.state.CurrentState; import org.apache.accumulo.server.manager.state.DeadServerList; import org.apache.accumulo.server.manager.state.TabletServerState; import org.apache.accumulo.server.manager.state.TabletStateStore; @@ -174,7 +171,7 @@ * The manager will also coordinate log recoveries and reports general status. */ public class Manager extends AbstractServer - implements LiveTServerSet.Listener, TableObserver, CurrentState, HighlyAvailableService { + implements LiveTServerSet.Listener, TableObserver, HighlyAvailableService { static final Logger log = LoggerFactory.getLogger(Manager.class); @@ -236,12 +233,14 @@ public class Manager extends AbstractServer private final TabletStateStore metadataTabletStore; private final TabletStateStore userTabletStore; - @Override public synchronized ManagerState getManagerState() { return state; } - @Override + // ELASTICITIY_TODO it would be nice if this method could take DataLevel as an argument and only + // retrieve information about compactions in that data level. Attempted this and a lot of + // refactoring was needed to get that small bit of information to this method. Would be best to + // address this after issue. May be best to attempt this after #3576. public Map> getCompactionHints() { Map allConfig = null; try { @@ -455,9 +454,9 @@ public static void main(String[] args) throws Exception { final long tokenLifetime = aconf.getTimeInMillis(Property.GENERAL_DELEGATION_TOKEN_LIFETIME); - this.rootTabletStore = TabletStateStore.getStoreForLevel(DataLevel.ROOT, context, this); - this.metadataTabletStore = TabletStateStore.getStoreForLevel(DataLevel.METADATA, context, this); - this.userTabletStore = TabletStateStore.getStoreForLevel(DataLevel.USER, context, this); + this.rootTabletStore = TabletStateStore.getStoreForLevel(DataLevel.ROOT, context); + this.metadataTabletStore = TabletStateStore.getStoreForLevel(DataLevel.METADATA, context); + this.userTabletStore = TabletStateStore.getStoreForLevel(DataLevel.USER, context); authenticationTokenKeyManager = null; keyDistributor = null; @@ -543,110 +542,8 @@ public CompactionJobQueues getCompactionQueues() { return compactionJobQueues; } - enum TabletGoalState { - HOSTED(TUnloadTabletGoal.UNKNOWN), - UNASSIGNED(TUnloadTabletGoal.UNASSIGNED), - DELETED(TUnloadTabletGoal.DELETED), - SUSPENDED(TUnloadTabletGoal.SUSPENDED); - - private final TUnloadTabletGoal unloadGoal; - - TabletGoalState(TUnloadTabletGoal unloadGoal) { - this.unloadGoal = unloadGoal; - } - - /** The purpose of unloading this tablet. */ - public TUnloadTabletGoal howUnload() { - return unloadGoal; - } - } - - TabletGoalState getSystemGoalState(TabletMetadata tm) { - switch (getManagerState()) { - case NORMAL: - return TabletGoalState.HOSTED; - case HAVE_LOCK: // fall-through intended - case INITIAL: // fall-through intended - case SAFE_MODE: - if (tm.getExtent().isMeta()) { - return TabletGoalState.HOSTED; - } - return TabletGoalState.UNASSIGNED; - case UNLOAD_METADATA_TABLETS: - if (tm.getExtent().isRootTablet()) { - return TabletGoalState.HOSTED; - } - return TabletGoalState.UNASSIGNED; - case UNLOAD_ROOT_TABLET: - return TabletGoalState.UNASSIGNED; - case STOP: - return TabletGoalState.UNASSIGNED; - default: - throw new IllegalStateException("Unknown Manager State"); - } - } - - TabletGoalState getTableGoalState(TabletMetadata tm) { - TableState tableState = getContext().getTableManager().getTableState(tm.getTableId()); - if (tableState == null) { - return TabletGoalState.DELETED; - } - switch (tableState) { - case DELETING: - return TabletGoalState.DELETED; - case OFFLINE: - case NEW: - return TabletGoalState.UNASSIGNED; - default: - switch (tm.getHostingGoal()) { - case ALWAYS: - return TabletGoalState.HOSTED; - case NEVER: - return TabletGoalState.UNASSIGNED; - case ONDEMAND: - if (tm.getHostingRequested()) { - return TabletGoalState.HOSTED; - } else { - return TabletGoalState.UNASSIGNED; - } - default: - throw new IllegalStateException( - "Tablet Hosting Goal is unhandled: " + tm.getHostingGoal()); - } - } - } - - TabletGoalState getGoalState(TabletMetadata tm) { - KeyExtent extent = tm.getExtent(); - // Shutting down? - TabletGoalState state = getSystemGoalState(tm); - - if (state == TabletGoalState.HOSTED) { - if (!upgradeCoordinator.getStatus().isParentLevelUpgraded(extent)) { - // The place where this tablet stores its metadata was not upgraded, so do not assign this - // tablet yet. - return TabletGoalState.UNASSIGNED; - } - - if (tm.getOperationId() != null) { - return TabletGoalState.UNASSIGNED; - } - - if (tm.hasCurrent() && serversToShutdown.contains(tm.getLocation().getServerInstance())) { - return TabletGoalState.SUSPENDED; - } - - // taking table offline? - state = getTableGoalState(tm); - if (state == TabletGoalState.HOSTED) { - // Maybe this tablet needs to be migrated - TServerInstance dest = migrations.get(extent); - if (dest != null && tm.hasCurrent() && !dest.equals(tm.getLocation().getServerInstance())) { - return TabletGoalState.UNASSIGNED; - } - } - } - return state; + public UpgradeCoordinator.UpgradeStatus getUpgradeStatus() { + return upgradeCoordinator.getStatus(); } private class MigrationCleanupThread implements Runnable { @@ -878,7 +775,7 @@ private void checkForHeldServer(SortedMap ts private long balanceTablets() { BalanceParamsImpl params = BalanceParamsImpl.fromThrift(tserverStatusForBalancer, - tServerGroupingForBalancer, tserverStatus, migrationsSnapshot()); + tServerGroupingForBalancer, tserverStatus, migrationsSnapshot().keySet()); long wait = tabletBalancer.balance(params); for (TabletMigration m : checkMigrationSanity(tserverStatusForBalancer.keySet(), @@ -1580,7 +1477,6 @@ public void initialize() {} @Override public void sessionExpired() {} - @Override public Set onlineTables() { Set result = new HashSet<>(); if (getManagerState() != ManagerState.NORMAL) { @@ -1604,12 +1500,10 @@ public Set onlineTables() { return result; } - @Override public Set onlineTabletServers() { return tserverSet.getSnapshot().getTservers(); } - @Override public LiveTServersSnapshot tserversSnapshot() { return tserverSet.getSnapshot(); } @@ -1695,19 +1589,15 @@ public boolean delegationTokensAvailable() { return delegationTokensAvailable; } - @Override - public Set migrationsSnapshot() { - Set migrationKeys; + public Map migrationsSnapshot() { synchronized (migrations) { - migrationKeys = new HashSet<>(migrations.keySet()); + return Map.copyOf(migrations); } - return Collections.unmodifiableSet(migrationKeys); } - @Override public Set shutdownServers() { synchronized (serversToShutdown) { - return new HashSet<>(serversToShutdown); + return Set.copyOf(serversToShutdown); } } diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java b/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java index 0b7902bd46a..3031590ec43 100644 --- a/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java @@ -51,7 +51,6 @@ import org.apache.accumulo.core.data.Value; import org.apache.accumulo.core.dataImpl.KeyExtent; import org.apache.accumulo.core.logging.TabletLogger; -import org.apache.accumulo.core.manager.balancer.TabletServerIdImpl; import org.apache.accumulo.core.manager.state.TabletManagement; import org.apache.accumulo.core.manager.state.TabletManagement.ManagementAction; import org.apache.accumulo.core.manager.state.tables.TableState; @@ -62,33 +61,32 @@ import org.apache.accumulo.core.metadata.RootTable; import org.apache.accumulo.core.metadata.TServerInstance; import org.apache.accumulo.core.metadata.TabletState; -import org.apache.accumulo.core.metadata.schema.Ample.DataLevel; +import org.apache.accumulo.core.metadata.schema.Ample; import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.CurrentLocationColumnFamily; import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.FutureLocationColumnFamily; import org.apache.accumulo.core.metadata.schema.TabletMetadata; import org.apache.accumulo.core.metadata.schema.TabletMetadata.Location; -import org.apache.accumulo.core.metadata.schema.TabletOperationType; import org.apache.accumulo.core.security.Authorizations; -import org.apache.accumulo.core.spi.balancer.data.TabletServerId; import org.apache.accumulo.core.util.TextUtil; import org.apache.accumulo.core.util.threads.Threads; import org.apache.accumulo.core.util.threads.Threads.AccumuloDaemonThread; -import org.apache.accumulo.manager.Manager.TabletGoalState; import org.apache.accumulo.manager.metrics.ManagerMetrics; import org.apache.accumulo.manager.split.SplitTask; import org.apache.accumulo.manager.state.TableCounts; import org.apache.accumulo.manager.state.TableStats; +import org.apache.accumulo.manager.upgrade.UpgradeCoordinator; import org.apache.accumulo.server.ServiceEnvironmentImpl; import org.apache.accumulo.server.compaction.CompactionJobGenerator; import org.apache.accumulo.server.conf.TableConfiguration; import org.apache.accumulo.server.log.WalStateManager; import org.apache.accumulo.server.log.WalStateManager.WalMarkerException; -import org.apache.accumulo.server.manager.LiveTServerSet; import org.apache.accumulo.server.manager.LiveTServerSet.TServerConnection; import org.apache.accumulo.server.manager.state.Assignment; import org.apache.accumulo.server.manager.state.ClosableIterator; import org.apache.accumulo.server.manager.state.DistributedStoreException; +import org.apache.accumulo.server.manager.state.TabletGoalState; import org.apache.accumulo.server.manager.state.TabletManagementIterator; +import org.apache.accumulo.server.manager.state.TabletManagementParameters; import org.apache.accumulo.server.manager.state.TabletStateStore; import org.apache.accumulo.server.manager.state.UnassignedTablet; import org.apache.hadoop.fs.Path; @@ -126,8 +124,8 @@ public Text getEncodedEndRow() { private SortedSet lastScanServers = Collections.emptySortedSet(); private final EventHandler eventHandler; private final ManagerMetrics metrics; - private WalStateManager walStateManager; + private volatile Set filteredServersToShutdown = Set.of(); TabletGroupWatcher(Manager manager, TabletStateStore store, TabletGroupWatcher dependentWatcher, ManagerMetrics metrics) { @@ -174,32 +172,31 @@ private static class TabletLists { private final SortedMap destinations; private final Map> currentTServerGrouping; - public TabletLists(Manager m, SortedMap curTServers, - Map> grouping) { - synchronized (m.serversToShutdown) { - var destinationsMod = new TreeMap<>(curTServers); - if (!m.serversToShutdown.isEmpty()) { - // Remove servers that are in the process of shutting down from the lists of tablet - // servers. - destinationsMod.keySet().removeAll(m.serversToShutdown); - HashMap> groupingCopy = new HashMap<>(); - grouping.forEach((group, groupsServers) -> { - if (Collections.disjoint(groupsServers, m.serversToShutdown)) { - groupingCopy.put(group, groupsServers); - } else { - var serversCopy = new HashSet<>(groupsServers); - serversCopy.removeAll(m.serversToShutdown); - groupingCopy.put(group, Collections.unmodifiableSet(serversCopy)); - } - }); - - this.currentTServerGrouping = Collections.unmodifiableMap(groupingCopy); - } else { - this.currentTServerGrouping = grouping; - } + public TabletLists(SortedMap curTServers, + Map> grouping, Set serversToShutdown) { + + var destinationsMod = new TreeMap<>(curTServers); + if (!serversToShutdown.isEmpty()) { + // Remove servers that are in the process of shutting down from the lists of tablet + // servers. + destinationsMod.keySet().removeAll(serversToShutdown); + HashMap> groupingCopy = new HashMap<>(); + grouping.forEach((group, groupsServers) -> { + if (Collections.disjoint(groupsServers, serversToShutdown)) { + groupingCopy.put(group, groupsServers); + } else { + var serversCopy = new HashSet<>(groupsServers); + serversCopy.removeAll(serversToShutdown); + groupingCopy.put(group, Collections.unmodifiableSet(serversCopy)); + } + }); - this.destinations = Collections.unmodifiableSortedMap(destinationsMod); + this.currentTServerGrouping = Collections.unmodifiableMap(groupingCopy); + } else { + this.currentTServerGrouping = grouping; } + + this.destinations = Collections.unmodifiableSortedMap(destinationsMod); } public void reset() { @@ -241,17 +238,17 @@ public void run() { continue; } - LiveTServerSet.LiveTServersSnapshot tservers = manager.tserverSet.getSnapshot(); - var currentTservers = getTserversStatus(tservers.getTservers()); + TabletManagementParameters tabletMgmtParams = createTabletManagementParameters(); + var currentTservers = getCurrentTservers(tabletMgmtParams.getOnlineTsevers()); if (currentTservers.isEmpty()) { setNeedsFullScan(); continue; } - try (var iter = store.iterator(ranges)) { + try (var iter = store.iterator(ranges, tabletMgmtParams)) { long t1 = System.currentTimeMillis(); - manageTablets(iter, currentTservers, tservers.getTserverGroups(), false); + manageTablets(iter, tabletMgmtParams, currentTservers, false); long t2 = System.currentTimeMillis(); Manager.log.debug(String.format("[%s]: partial scan time %.2f seconds for %,d ranges", store.name(), (t2 - t1) / 1000., ranges.size())); @@ -315,20 +312,49 @@ synchronized void waitForFullScan(long millis) { } } + private TabletManagementParameters createTabletManagementParameters() { + + HashMap parentLevelUpgrade = new HashMap<>(); + UpgradeCoordinator.UpgradeStatus upgradeStatus = manager.getUpgradeStatus(); + for (var level : Ample.DataLevel.values()) { + parentLevelUpgrade.put(level, upgradeStatus.isParentLevelUpgraded(level)); + } + + Set shutdownServers; + if (store.getLevel() == Ample.DataLevel.USER) { + shutdownServers = manager.shutdownServers(); + } else { + // Use the servers to shutdown filtered by the dependent watcher. These are servers to + // shutdown that the dependent watcher has determined it has no tablets hosted on or assigned + // to. + shutdownServers = dependentWatcher.getFilteredServersToShutdown(); + } + + var tServersSnapshot = manager.tserversSnapshot(); + + return new TabletManagementParameters(manager.getManagerState(), parentLevelUpgrade, + manager.onlineTables(), tServersSnapshot, shutdownServers, manager.migrationsSnapshot(), + store.getLevel(), manager.getCompactionHints(), canSuspendTablets()); + } + + private Set getFilteredServersToShutdown() { + return filteredServersToShutdown; + } + private static class TableMgmtStats { int[] counts = new int[TabletState.values().length]; private int totalUnloaded; } private TableMgmtStats manageTablets(Iterator iter, - SortedMap currentTServers, - Map> tserverGroups, boolean isFullScan) + TabletManagementParameters tableMgmtParams, + SortedMap currentTServers, boolean isFullScan) throws BadLocationStateException, TException, DistributedStoreException, WalMarkerException, IOException { TableMgmtStats tableMgmtStats = new TableMgmtStats(); final boolean shuttingDownAllTabletServers = - manager.serversToShutdown.equals(currentTServers.keySet()); + tableMgmtParams.getServersToShutdown().equals(currentTServers.keySet()); if (shuttingDownAllTabletServers && !isFullScan) { // If we are shutting down all of the TabletServers, then don't process any events // from the EventCoordinator. @@ -338,16 +364,14 @@ private TableMgmtStats manageTablets(Iterator iter, int unloaded = 0; - TabletLists tLists = new TabletLists(manager, currentTServers, tserverGroups); + TabletLists tLists = new TabletLists(currentTServers, tableMgmtParams.getGroupedTServers(), + tableMgmtParams.getServersToShutdown()); CompactionJobGenerator compactionGenerator = new CompactionJobGenerator( - new ServiceEnvironmentImpl(manager.getContext()), manager.getCompactionHints()); + new ServiceEnvironmentImpl(manager.getContext()), tableMgmtParams.getCompactionHints()); - final Map resourceGroups = new HashMap<>(); - tserverGroups.forEach((group, tservers) -> { - tservers.stream().map(TabletServerIdImpl::new) - .forEach(tabletServerId -> resourceGroups.put(tabletServerId, group)); - }); + Set filteredServersToShutdown = + new HashSet<>(tableMgmtParams.getServersToShutdown()); while (iter.hasNext()) { final TabletManagement mti = iter.next(); @@ -391,9 +415,12 @@ private TableMgmtStats manageTablets(Iterator iter, final TableConfiguration tableConf = manager.getContext().getTableConfiguration(tableId); - TabletGoalState goal = manager.getGoalState(tm); - TabletState state = - TabletState.compute(tm, currentTServers.keySet(), manager.tabletBalancer, resourceGroups); + final TabletState state = TabletState.compute(tm, currentTServers.keySet()); + // This is final because nothing in this method should change the goal. All computation of the + // goal should be done in TabletGoalState.compute() so that all parts of the Accumulo code + // will compute a consistent goal. + final TabletGoalState goal = + TabletGoalState.compute(tm, state, manager.tabletBalancer, tableMgmtParams); final Location location = tm.getLocation(); Location current = null; @@ -411,68 +438,14 @@ private TableMgmtStats manageTablets(Iterator iter, stats.update(tableId, state); } - // Always follow through with assignments - if (state == TabletState.ASSIGNED) { - goal = TabletGoalState.HOSTED; - } else if (state == TabletState.NEEDS_REASSIGNMENT) { - goal = TabletGoalState.UNASSIGNED; - } - - if (tm.getOperationId() != null) { - // If there are still wals the tablet needs to be hosted - // to process the wals before starting the merge op - if (!tm.getLogs().isEmpty() - && tm.getOperationId().getType() == TabletOperationType.MERGING) { - goal = TabletGoalState.HOSTED; - } else { - goal = TabletGoalState.UNASSIGNED; - } - } - if (Manager.log.isTraceEnabled()) { Manager.log.trace( "[{}] Shutting down all Tservers: {}, dependentCount: {} Extent: {}, state: {}, goal: {} actions:{}", - store.name(), manager.serversToShutdown.equals(currentTServers.keySet()), + store.name(), tableMgmtParams.getServersToShutdown().equals(currentTServers.keySet()), dependentWatcher == null ? "null" : dependentWatcher.assignedOrHosted(), tm.getExtent(), state, goal, actions); } - // if we are shutting down all the tabletservers, we have to do it in order - if (shuttingDownAllTabletServers - && (goal == TabletGoalState.SUSPENDED && state == TabletState.HOSTED)) { - if (dependentWatcher != null) { - // If the dependentWatcher is for the user tables, check to see - // that user tables exist. - DataLevel dependentLevel = dependentWatcher.store.getLevel(); - boolean userTablesExist = true; - switch (dependentLevel) { - case USER: - Set onlineTables = manager.onlineTables(); - onlineTables.remove(RootTable.ID); - onlineTables.remove(MetadataTable.ID); - userTablesExist = !onlineTables.isEmpty(); - break; - case METADATA: - case ROOT: - default: - break; - } - // If the stats object in the dependentWatcher is empty, then it - // currently does not have data about what is hosted or not. In - // that case host these tablets until the dependent watcher can - // gather some data. - final Map stats = dependentWatcher.getStats(); - if (dependentLevel == DataLevel.USER) { - if (userTablesExist - && (stats == null || stats.isEmpty() || assignedOrHosted(stats) > 0)) { - goal = TabletGoalState.HOSTED; - } - } else if (stats == null || stats.isEmpty() || assignedOrHosted(stats) > 0) { - goal = TabletGoalState.HOSTED; - } - } - } - if (actions.contains(ManagementAction.NEEDS_SPLITTING)) { LOG.debug("{} may need splitting.", tm.getExtent()); if (manager.getSplitter().isSplittable(tm)) { @@ -502,6 +475,11 @@ private TableMgmtStats manageTablets(Iterator iter, // metadata scan could remove any tablets that were not updated during the scan. if (actions.contains(ManagementAction.NEEDS_LOCATION_UPDATE)) { + + if (tm.getLocation() != null) { + filteredServersToShutdown.remove(tm.getLocation().getServerInstance()); + } + if (goal == TabletGoalState.HOSTED) { if ((state != TabletState.HOSTED && !tm.getLogs().isEmpty()) && manager.recoveryManager.recoverLogs(tm.getExtent(), tm.getLogs())) { @@ -544,7 +522,6 @@ private TableMgmtStats manageTablets(Iterator iter, case ASSIGNED_TO_DEAD_SERVER: unassignDeadTablet(tLists, tm); break; - case NEEDS_REASSIGNMENT: case HOSTED: TServerConnection client = manager.tserverSet.getConnection(location.getServerInstance()); @@ -568,14 +545,19 @@ private TableMgmtStats manageTablets(Iterator iter, } flushChanges(tLists); + + if (isFullScan) { + this.filteredServersToShutdown = Set.copyOf(filteredServersToShutdown); + } + return tableMgmtStats; } private SortedMap - getTserversStatus(Set currentServers) { + getCurrentTservers(Set onlineTservers) { // Get the current status for the current list of tservers final SortedMap currentTServers = new TreeMap<>(); - for (TServerInstance entry : currentServers) { + for (TServerInstance entry : onlineTservers) { currentTServers.put(entry, manager.tserverStatus.get(entry)); } return currentTServers; @@ -594,8 +576,8 @@ public void run() { final long waitTimeBetweenScans = manager.getConfiguration() .getTimeInMillis(Property.MANAGER_TABLET_GROUP_WATCHER_INTERVAL); - LiveTServerSet.LiveTServersSnapshot tservers = manager.tserverSet.getSnapshot(); - var currentTServers = getTserversStatus(tservers.getTservers()); + TabletManagementParameters tableMgmtParams = createTabletManagementParameters(); + var currentTServers = getCurrentTservers(tableMgmtParams.getOnlineTsevers()); ClosableIterator iter = null; try { @@ -609,15 +591,14 @@ public void run() { stats.begin(); - ManagerState managerState = manager.getManagerState(); + ManagerState managerState = tableMgmtParams.getManagerState(); // Clear the need for a full scan before starting a full scan inorder to detect events that // happen during the full scan. eventHandler.clearNeedsFullScan(); - iter = store.iterator(); - var tabletMgmtStats = - manageTablets(iter, currentTServers, tservers.getTserverGroups(), true); + iter = store.iterator(tableMgmtParams); + var tabletMgmtStats = manageTablets(iter, tableMgmtParams, currentTServers, true); // provide stats after flushing changes to avoid race conditions w/ delete table stats.end(managerState); diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/delete/CleanUp.java b/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/delete/CleanUp.java index c0d60c82346..0b0512b0722 100644 --- a/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/delete/CleanUp.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/delete/CleanUp.java @@ -18,6 +18,10 @@ */ package org.apache.accumulo.manager.tableOps.delete; +import static org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.LOCATION; +import static org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.PREV_ROW; +import static org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.SUSPEND; + import java.io.IOException; import java.net.UnknownHostException; import java.util.Arrays; @@ -27,17 +31,14 @@ import org.apache.accumulo.core.client.AccumuloClient; import org.apache.accumulo.core.client.BatchScanner; import org.apache.accumulo.core.client.IteratorSetting; -import org.apache.accumulo.core.client.Scanner; import org.apache.accumulo.core.clientImpl.thrift.ThriftSecurityException; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.NamespaceId; import org.apache.accumulo.core.data.Range; import org.apache.accumulo.core.data.TableId; import org.apache.accumulo.core.data.Value; -import org.apache.accumulo.core.dataImpl.KeyExtent; import org.apache.accumulo.core.fate.Repo; import org.apache.accumulo.core.iterators.user.GrepIterator; -import org.apache.accumulo.core.manager.state.TabletManagement; import org.apache.accumulo.core.metadata.MetadataTable; import org.apache.accumulo.core.metadata.TServerInstance; import org.apache.accumulo.core.metadata.TabletState; @@ -49,7 +50,6 @@ import org.apache.accumulo.manager.tableOps.ManagerRepo; import org.apache.accumulo.manager.tableOps.Utils; import org.apache.accumulo.server.fs.VolumeManager; -import org.apache.accumulo.server.manager.state.TabletManagementIterator; import org.apache.accumulo.server.problems.ProblemReports; import org.apache.accumulo.server.util.MetadataTableUtil; import org.apache.hadoop.fs.Path; @@ -92,23 +92,20 @@ public long isReady(long tid, Manager manager) throws Exception { } boolean done = true; - Range tableRange = new KeyExtent(tableId, null, null).toMetaRange(); - Scanner scanner = manager.getContext().createScanner(MetadataTable.NAME, Authorizations.EMPTY); - TabletManagementIterator.configureScanner(scanner, manager); - scanner.setRange(tableRange); - - for (Entry entry : scanner) { - final TabletManagement mti = TabletManagementIterator.decode(entry); - final TabletMetadata tm = mti.getTabletMetadata(); + + try (var tablets = manager.getContext().getAmple().readTablets().forTable(tableId) + .fetch(LOCATION, PREV_ROW, SUSPEND).checkConsistency().build()) { Set liveTServers = manager.onlineTabletServers(); - TabletState state = TabletState.compute(tm, liveTServers); - if (!state.equals(TabletState.UNASSIGNED)) { - // This code will even wait on tablets that are assigned to dead tablets servers. This is - // intentional because the manager may make metadata writes for these tablets. See #587 - log.debug("Still waiting for table({}) to be deleted; Target tablet state: UNASSIGNED, " - + "Current tablet state: {}, locationState: {}", tableId, state, tm); - done = false; - break; + for (TabletMetadata tm : tablets) { + TabletState state = TabletState.compute(tm, liveTServers); + if (!state.equals(TabletState.UNASSIGNED)) { + // This code will even wait on tablets that are assigned to dead tablets servers. This is + // intentional because the manager may make metadata writes for these tablets. See #587 + log.debug("Still waiting for table({}) to be deleted; Target tablet state: UNASSIGNED, " + + "Current tablet state: {}, locationState: {}", tableId, state, tm); + done = false; + break; + } } } diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/merge/DeleteRows.java b/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/merge/DeleteRows.java index 6413e83c145..0cc31fcaa90 100644 --- a/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/merge/DeleteRows.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/merge/DeleteRows.java @@ -99,7 +99,6 @@ private Optional deleteTabletFiles(Manager manager, long tid) { for (var tabletMetadata : tabletsMetadata) { validateTablet(tabletMetadata, fateStr, opid, data.tableId); - var tabletMutator = tabletsMutator.mutateTablet(tabletMetadata.getExtent()) .requireOperation(opid).requireAbsentLocation(); diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/upgrade/UpgradeCoordinator.java b/server/manager/src/main/java/org/apache/accumulo/manager/upgrade/UpgradeCoordinator.java index 32ce0c29341..dedda83872b 100644 --- a/server/manager/src/main/java/org/apache/accumulo/manager/upgrade/UpgradeCoordinator.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/upgrade/UpgradeCoordinator.java @@ -32,9 +32,9 @@ import org.apache.accumulo.core.Constants; import org.apache.accumulo.core.client.AccumuloException; -import org.apache.accumulo.core.dataImpl.KeyExtent; import org.apache.accumulo.core.fate.ReadOnlyTStore; import org.apache.accumulo.core.fate.ZooStore; +import org.apache.accumulo.core.metadata.schema.Ample; import org.apache.accumulo.core.util.threads.ThreadPools; import org.apache.accumulo.core.volume.Volume; import org.apache.accumulo.manager.EventCoordinator; @@ -59,7 +59,7 @@ public enum UpgradeStatus { */ INITIAL { @Override - public boolean isParentLevelUpgraded(KeyExtent extent) { + public boolean isParentLevelUpgraded(Ample.DataLevel level) { return false; } }, @@ -68,8 +68,8 @@ public boolean isParentLevelUpgraded(KeyExtent extent) { */ UPGRADED_ZOOKEEPER { @Override - public boolean isParentLevelUpgraded(KeyExtent extent) { - return extent.isRootTablet(); + public boolean isParentLevelUpgraded(Ample.DataLevel level) { + return level == Ample.DataLevel.ROOT; } }, /** @@ -77,8 +77,8 @@ public boolean isParentLevelUpgraded(KeyExtent extent) { */ UPGRADED_ROOT { @Override - public boolean isParentLevelUpgraded(KeyExtent extent) { - return extent.isMeta(); + public boolean isParentLevelUpgraded(Ample.DataLevel level) { + return level == Ample.DataLevel.METADATA || level == Ample.DataLevel.ROOT; } }, /** @@ -86,7 +86,7 @@ public boolean isParentLevelUpgraded(KeyExtent extent) { */ COMPLETE { @Override - public boolean isParentLevelUpgraded(KeyExtent extent) { + public boolean isParentLevelUpgraded(Ample.DataLevel level) { return true; } }, @@ -95,7 +95,7 @@ public boolean isParentLevelUpgraded(KeyExtent extent) { */ FAILED { @Override - public boolean isParentLevelUpgraded(KeyExtent extent) { + public boolean isParentLevelUpgraded(Ample.DataLevel level) { return false; } }; @@ -104,7 +104,7 @@ public boolean isParentLevelUpgraded(KeyExtent extent) { * Determines if the place where this extent stores its metadata was upgraded for a given * upgrade status. */ - public abstract boolean isParentLevelUpgraded(KeyExtent extent); + public abstract boolean isParentLevelUpgraded(Ample.DataLevel level); } private static final Logger log = LoggerFactory.getLogger(UpgradeCoordinator.class); diff --git a/server/monitor/pom.xml b/server/monitor/pom.xml index c52fe1fb624..49534f0d1e4 100644 --- a/server/monitor/pom.xml +++ b/server/monitor/pom.xml @@ -80,10 +80,6 @@ org.apache.accumulo accumulo-start - - org.apache.hadoop - hadoop-client-api - org.apache.logging.log4j log4j-api diff --git a/server/monitor/src/main/java/org/apache/accumulo/monitor/rest/tables/TablesResource.java b/server/monitor/src/main/java/org/apache/accumulo/monitor/rest/tables/TablesResource.java index 39a6b51cf7b..ccc5eb7ca55 100644 --- a/server/monitor/src/main/java/org/apache/accumulo/monitor/rest/tables/TablesResource.java +++ b/server/monitor/src/main/java/org/apache/accumulo/monitor/rest/tables/TablesResource.java @@ -36,23 +36,20 @@ import jakarta.ws.rs.Produces; import jakarta.ws.rs.core.MediaType; -import org.apache.accumulo.core.data.Range; import org.apache.accumulo.core.data.TableId; import org.apache.accumulo.core.manager.state.tables.TableState; import org.apache.accumulo.core.manager.thrift.ManagerMonitorInfo; import org.apache.accumulo.core.manager.thrift.TableInfo; import org.apache.accumulo.core.manager.thrift.TabletServerStatus; -import org.apache.accumulo.core.metadata.MetadataTable; import org.apache.accumulo.core.metadata.RootTable; -import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection; +import org.apache.accumulo.core.metadata.schema.Ample; import org.apache.accumulo.core.metadata.schema.TabletMetadata; +import org.apache.accumulo.core.metadata.schema.TabletsMetadata; import org.apache.accumulo.monitor.Monitor; import org.apache.accumulo.monitor.rest.tservers.TabletServer; import org.apache.accumulo.monitor.rest.tservers.TabletServers; -import org.apache.accumulo.server.manager.state.TabletManagementScanner; import org.apache.accumulo.server.tables.TableManager; import org.apache.accumulo.server.util.TableInfoUtil; -import org.apache.hadoop.io.Text; /** * Generates a tables list from the Monitor as a JSON object @@ -145,25 +142,20 @@ public TabletServers getParticipatingTabletServers(@PathParam("tableId") @NotNul if (RootTable.ID.equals(tableId)) { locs.add(rootTabletLocation); } else { - String systemTableName = - MetadataTable.ID.equals(tableId) ? RootTable.NAME : MetadataTable.NAME; - TabletManagementScanner scanner = new TabletManagementScanner(monitor.getContext(), - new Range(TabletsSection.encodeRow(tableId, new Text()), - TabletsSection.encodeRow(tableId, null)), - systemTableName); - - while (scanner.hasNext()) { - final TabletMetadata tm = scanner.next().getTabletMetadata(); - if (tm.hasCurrent()) { - try { - locs.add(tm.getLocation().getHostPort()); - } catch (Exception ex) { - scanner.close(); - return tabletServers; + var level = Ample.DataLevel.of(tableId); + try (TabletsMetadata tablets = + monitor.getContext().getAmple().readTablets().forLevel(level).build()) { + + for (TabletMetadata tm : tablets) { + if (tm.hasCurrent()) { + try { + locs.add(tm.getLocation().getHostPort()); + } catch (Exception ex) { + return tabletServers; + } } } } - scanner.close(); } List tservers = new ArrayList<>(); diff --git a/test/src/main/java/org/apache/accumulo/test/LocatorIT.java b/test/src/main/java/org/apache/accumulo/test/LocatorIT.java index fa301cb5ae4..3bad4dbd45d 100644 --- a/test/src/main/java/org/apache/accumulo/test/LocatorIT.java +++ b/test/src/main/java/org/apache/accumulo/test/LocatorIT.java @@ -52,10 +52,8 @@ import org.apache.accumulo.test.functional.ManagerAssignmentIT; import org.apache.accumulo.test.util.Wait; import org.apache.hadoop.io.Text; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; -@Disabled // ELASTICITY_TODO public class LocatorIT extends AccumuloClusterHarness { @Override @@ -129,10 +127,8 @@ public void testBasic() throws Exception { ranges.clear(); tableOps.setTabletHostingGoal(tableName, new Range(), TabletHostingGoal.ALWAYS); - Wait.waitFor( - () -> alwaysHostedAndCurrentNotNull.test( - ManagerAssignmentIT.getManagerTabletInfo(client, tableId, null).getTabletMetadata()), - 60000, 250); + Wait.waitFor(() -> alwaysHostedAndCurrentNotNull + .test(ManagerAssignmentIT.getTabletMetadata(client, tableId, null)), 60000, 250); ranges.add(r1); Locations ret = tableOps.locate(tableName, ranges); @@ -147,13 +143,8 @@ public void testBasic() throws Exception { splits.add(new Text("r")); tableOps.addSplits(tableName, splits); - // ELASTICITY_TODO split does not set hosting goal, so this throws exception - assertThrows(AccumuloException.class, () -> tableOps.locate(tableName, ranges)); - tableOps.setTabletHostingGoal(tableName, new Range(), TabletHostingGoal.ALWAYS); - Wait.waitFor( - () -> alwaysHostedAndCurrentNotNull.test( - ManagerAssignmentIT.getManagerTabletInfo(client, tableId, null).getTabletMetadata()), - 60000, 250); + Wait.waitFor(() -> alwaysHostedAndCurrentNotNull + .test(ManagerAssignmentIT.getTabletMetadata(client, tableId, null)), 60000, 250); ret = tableOps.locate(tableName, ranges); assertContains(ret, tservers, Map.of(r1, Set.of(t2), r2, Set.of(t2, t3)), @@ -165,15 +156,9 @@ public void testBasic() throws Exception { tableOps.online(tableName, true); - // ELASTICITY_TODO Split does not set hosting goal - tableOps.setTabletHostingGoal(tableName, new Range(), TabletHostingGoal.ALWAYS); - - // TabletGroupWatcher interval set to 5s - Thread.sleep(7000); - Wait.waitFor( - () -> alwaysHostedAndCurrentNotNull.test(ManagerAssignmentIT - .getManagerTabletInfo(client, tableId, new Text("r")).getTabletMetadata()), + () -> alwaysHostedAndCurrentNotNull + .test(ManagerAssignmentIT.getTabletMetadata(client, tableId, new Text("r"))), 60000, 250); ArrayList ranges2 = new ArrayList<>(); diff --git a/test/src/main/java/org/apache/accumulo/test/ManagerRepairsDualAssignmentIT.java b/test/src/main/java/org/apache/accumulo/test/ManagerRepairsDualAssignmentIT.java index c4094cfe08a..502921e9751 100644 --- a/test/src/main/java/org/apache/accumulo/test/ManagerRepairsDualAssignmentIT.java +++ b/test/src/main/java/org/apache/accumulo/test/ManagerRepairsDualAssignmentIT.java @@ -26,15 +26,14 @@ import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; +import java.util.concurrent.atomic.AtomicBoolean; import org.apache.accumulo.core.client.Accumulo; import org.apache.accumulo.core.client.AccumuloClient; import org.apache.accumulo.core.client.admin.NewTableConfiguration; import org.apache.accumulo.core.client.admin.TabletHostingGoal; -import org.apache.accumulo.core.clientImpl.ClientContext; import org.apache.accumulo.core.conf.Property; import org.apache.accumulo.core.dataImpl.KeyExtent; -import org.apache.accumulo.core.manager.state.TabletManagement; import org.apache.accumulo.core.metadata.MetadataTable; import org.apache.accumulo.core.metadata.RootTable; import org.apache.accumulo.core.metadata.schema.Ample.DataLevel; @@ -45,8 +44,6 @@ import org.apache.accumulo.minicluster.ServerType; import org.apache.accumulo.miniclusterImpl.MiniAccumuloConfigImpl; import org.apache.accumulo.server.ServerContext; -import org.apache.accumulo.server.manager.state.ClosableIterator; -import org.apache.accumulo.server.manager.state.TabletStateStore; import org.apache.accumulo.test.functional.ConfigurableMacBase; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.RawLocalFileSystem; @@ -74,7 +71,6 @@ public void configure(MiniAccumuloConfigImpl cfg, Configuration hadoopCoreSite) public void test() throws Exception { // make some tablets, spread 'em around try (AccumuloClient c = Accumulo.newClient().from(getClientProperties()).build()) { - ClientContext context = (ClientContext) c; ServerContext serverContext = cluster.getServerContext(); String table = this.getUniqueNames(1)[0]; c.securityOperations().grantTablePermission("root", MetadataTable.NAME, @@ -90,16 +86,19 @@ public void test() throws Exception { // scan the metadata table and get the two table location states Set states = new HashSet<>(); Set oldLocations = new HashSet<>(); - TabletStateStore store = TabletStateStore.getStoreForLevel(DataLevel.USER, serverContext); while (states.size() < 2) { UtilWaitThread.sleep(250); oldLocations.clear(); - for (TabletManagement mti : store) { - if (mti.getTabletMetadata().hasCurrent()) { - states.add(mti.getTabletMetadata().getLocation()); - oldLocations.add(mti.getTabletMetadata()); - } + try ( + var tablets = serverContext.getAmple().readTablets().forLevel(DataLevel.USER).build()) { + tablets.iterator().forEachRemaining(tm -> { + if (tm.hasCurrent()) { + states.add(tm.getLocation()); + oldLocations.add(tm); + } + }); } + } assertEquals(2, states.size()); // Kill a tablet server... we don't care which one... wait for everything to be reassigned @@ -109,16 +108,19 @@ public void test() throws Exception { while (true) { UtilWaitThread.sleep(1000); states.clear(); - boolean allAssigned = true; - for (TabletManagement mti : store) { - if (mti.getTabletMetadata().hasCurrent()) { - states.add(mti.getTabletMetadata().getLocation()); - } else { - allAssigned = false; - } + AtomicBoolean allAssigned = new AtomicBoolean(true); + try ( + var tablets = serverContext.getAmple().readTablets().forLevel(DataLevel.USER).build()) { + tablets.iterator().forEachRemaining(tm -> { + if (tm.hasCurrent()) { + states.add(tm.getLocation()); + } else { + allAssigned.set(false); + } + }); } System.out.println(states + " size " + states.size() + " allAssigned " + allAssigned); - if (states.size() != 2 && allAssigned) { + if (states.size() != 2 && allAssigned.get()) { break; } } @@ -136,20 +138,20 @@ public void test() throws Exception { tabletMutator.putLocation(moved.getLocation()); tabletMutator.mutate(); // wait for the manager to fix the problem - waitForCleanStore(store); + waitForCleanStore(serverContext, DataLevel.USER); // now jam up the metadata table tabletMutator = serverContext.getAmple().mutateTablet(new KeyExtent(MetadataTable.ID, null, null)); tabletMutator.putLocation(moved.getLocation()); tabletMutator.mutate(); - waitForCleanStore(TabletStateStore.getStoreForLevel(DataLevel.METADATA, serverContext)); + waitForCleanStore(serverContext, DataLevel.METADATA); } } - private void waitForCleanStore(TabletStateStore store) { + private void waitForCleanStore(ServerContext serverContext, DataLevel level) { while (true) { - try (ClosableIterator iter = store.iterator()) { - iter.forEachRemaining(t -> {}); + try (var tablets = serverContext.getAmple().readTablets().forLevel(level).build()) { + tablets.iterator().forEachRemaining(t -> {}); } catch (Exception ex) { System.out.println(ex); UtilWaitThread.sleep(250); diff --git a/test/src/main/java/org/apache/accumulo/test/functional/AssignLocationModeIT.java b/test/src/main/java/org/apache/accumulo/test/functional/AssignLocationModeIT.java index e742cbbde43..92805e572a0 100644 --- a/test/src/main/java/org/apache/accumulo/test/functional/AssignLocationModeIT.java +++ b/test/src/main/java/org/apache/accumulo/test/functional/AssignLocationModeIT.java @@ -66,7 +66,7 @@ public void test() throws Exception { TabletMetadata newTablet; do { UtilWaitThread.sleep(250); - newTablet = ManagerAssignmentIT.getManagerTabletInfo(c, tableId, null).getTabletMetadata(); + newTablet = ManagerAssignmentIT.getTabletMetadata(c, tableId, null); } while (!newTablet.hasCurrent()); // this would be null if the mode was not "assign" assertEquals(newTablet.getLocation().getHostPort(), newTablet.getLast().getHostPort()); @@ -82,24 +82,21 @@ public void test() throws Exception { .get(Property.TSERV_LAST_LOCATION_MODE.getKey())); // last location should not be set yet - TabletMetadata unflushed = - ManagerAssignmentIT.getManagerTabletInfo(c, tableId, null).getTabletMetadata(); + TabletMetadata unflushed = ManagerAssignmentIT.getTabletMetadata(c, tableId, null); assertEquals(newTablet.getLocation().getHostPort(), unflushed.getLocation().getHostPort()); assertEquals(newTablet.getLocation().getHostPort(), unflushed.getLast().getHostPort()); assertTrue(newTablet.hasCurrent()); // take the tablet offline c.tableOperations().offline(tableName, true); - TabletMetadata offline = - ManagerAssignmentIT.getManagerTabletInfo(c, tableId, null).getTabletMetadata(); + TabletMetadata offline = ManagerAssignmentIT.getTabletMetadata(c, tableId, null); assertNull(offline.getLocation()); assertFalse(offline.hasCurrent()); assertEquals(newTablet.getLocation().getHostPort(), offline.getLast().getHostPort()); // put it back online, should have the same last location c.tableOperations().online(tableName, true); - TabletMetadata online = - ManagerAssignmentIT.getManagerTabletInfo(c, tableId, null).getTabletMetadata(); + TabletMetadata online = ManagerAssignmentIT.getTabletMetadata(c, tableId, null); assertTrue(online.hasCurrent()); assertNotNull(online.getLocation()); assertEquals(newTablet.getLast().getHostPort(), online.getLast().getHostPort()); diff --git a/test/src/main/java/org/apache/accumulo/test/functional/CompactLocationModeIT.java b/test/src/main/java/org/apache/accumulo/test/functional/CompactLocationModeIT.java index 471aedf566d..eb672574f07 100644 --- a/test/src/main/java/org/apache/accumulo/test/functional/CompactLocationModeIT.java +++ b/test/src/main/java/org/apache/accumulo/test/functional/CompactLocationModeIT.java @@ -63,7 +63,7 @@ public void test() throws Exception { TabletMetadata newTablet; do { UtilWaitThread.sleep(250); - newTablet = ManagerAssignmentIT.getManagerTabletInfo(c, tableId, null).getTabletMetadata(); + newTablet = ManagerAssignmentIT.getTabletMetadata(c, tableId, null); } while (!newTablet.hasCurrent()); assertNull(newTablet.getLast()); assertNotNull(newTablet.getLocation()); @@ -79,8 +79,7 @@ public void test() throws Exception { .get(Property.TSERV_LAST_LOCATION_MODE.getKey())); // no last location should be set yet - TabletMetadata unflushed = - ManagerAssignmentIT.getManagerTabletInfo(c, tableId, null).getTabletMetadata(); + TabletMetadata unflushed = ManagerAssignmentIT.getTabletMetadata(c, tableId, null); assertEquals(newTablet.getLocation().getHostPort(), unflushed.getLocation().getHostPort()); assertNull(unflushed.getLast()); assertTrue(newTablet.hasCurrent()); @@ -88,23 +87,20 @@ public void test() throws Exception { // This should give it a last location if the mode is being used correctly c.tableOperations().flush(tableName, null, null, true); - TabletMetadata flushed = - ManagerAssignmentIT.getManagerTabletInfo(c, tableId, null).getTabletMetadata(); + TabletMetadata flushed = ManagerAssignmentIT.getTabletMetadata(c, tableId, null); assertEquals(newTablet.getLocation().getHostPort(), flushed.getLocation().getHostPort()); assertEquals(flushed.getLocation().getHostPort(), flushed.getLast().getHostPort()); assertTrue(newTablet.hasCurrent()); // take the tablet offline c.tableOperations().offline(tableName, true); - TabletMetadata offline = - ManagerAssignmentIT.getManagerTabletInfo(c, tableId, null).getTabletMetadata(); + TabletMetadata offline = ManagerAssignmentIT.getTabletMetadata(c, tableId, null); assertNull(offline.getLocation()); assertEquals(flushed.getLocation().getHostPort(), offline.getLast().getHostPort()); // put it back online, should have the same last location c.tableOperations().online(tableName, true); - TabletMetadata online = - ManagerAssignmentIT.getManagerTabletInfo(c, tableId, null).getTabletMetadata(); + TabletMetadata online = ManagerAssignmentIT.getTabletMetadata(c, tableId, null); assertTrue(online.hasCurrent()); assertNotNull(online.getLocation()); assertEquals(offline.getLast().getHostPort(), online.getLast().getHostPort()); diff --git a/test/src/main/java/org/apache/accumulo/test/functional/ManagerAssignmentIT.java b/test/src/main/java/org/apache/accumulo/test/functional/ManagerAssignmentIT.java index 0f82311d7a5..739add9599c 100644 --- a/test/src/main/java/org/apache/accumulo/test/functional/ManagerAssignmentIT.java +++ b/test/src/main/java/org/apache/accumulo/test/functional/ManagerAssignmentIT.java @@ -63,7 +63,6 @@ import org.apache.accumulo.core.data.Value; import org.apache.accumulo.core.dataImpl.KeyExtent; import org.apache.accumulo.core.lock.ServiceLock; -import org.apache.accumulo.core.manager.state.TabletManagement; import org.apache.accumulo.core.metadata.MetadataTable; import org.apache.accumulo.core.metadata.RootTable; import org.apache.accumulo.core.metadata.schema.Ample; @@ -79,7 +78,6 @@ import org.apache.accumulo.core.trace.TraceUtil; import org.apache.accumulo.harness.SharedMiniClusterBase; import org.apache.accumulo.minicluster.ServerType; -import org.apache.accumulo.server.manager.state.TabletManagementScanner; import org.apache.accumulo.test.util.Wait; import org.apache.hadoop.io.Text; import org.junit.jupiter.api.BeforeAll; @@ -133,10 +131,8 @@ public void test() throws Exception { // wait for the tablet to exist in the metadata table. The tablet // will not be hosted so the current location will be empty. - Wait.waitFor( - () -> getManagerTabletInfo(c, tableId, null).getTabletMetadata().getExtent() != null, - 10000, 250); - TabletMetadata newTablet = getManagerTabletInfo(c, tableId, null).getTabletMetadata(); + Wait.waitFor(() -> getTabletMetadata(c, tableId, null) != null, 10000, 250); + TabletMetadata newTablet = getTabletMetadata(c, tableId, null); assertNotNull(newTablet.getExtent()); assertFalse(newTablet.hasCurrent()); assertNull(newTablet.getLast()); @@ -152,7 +148,7 @@ public void test() throws Exception { // give it a last location c.tableOperations().flush(tableName, null, null, true); - TabletMetadata flushed = getManagerTabletInfo(c, tableId, null).getTabletMetadata(); + TabletMetadata flushed = getTabletMetadata(c, tableId, null); assertTrue(flushed.hasCurrent()); assertNotNull(flushed.getLocation()); assertEquals(flushed.getLocation().getHostPort(), flushed.getLast().getHostPort()); @@ -161,7 +157,7 @@ public void test() throws Exception { // take the tablet offline c.tableOperations().offline(tableName, true); - TabletMetadata offline = getManagerTabletInfo(c, tableId, null).getTabletMetadata(); + TabletMetadata offline = getTabletMetadata(c, tableId, null); assertFalse(offline.hasCurrent()); assertNull(offline.getLocation()); assertEquals(flushed.getLocation().getHostPort(), offline.getLast().getHostPort()); @@ -169,7 +165,7 @@ public void test() throws Exception { // put it back online c.tableOperations().online(tableName, true); - TabletMetadata online = getManagerTabletInfo(c, tableId, null).getTabletMetadata(); + TabletMetadata online = getTabletMetadata(c, tableId, null); assertTrue(online.hasCurrent()); assertNotNull(online.getLocation()); assertEquals(online.getLocation().getHostPort(), online.getLast().getHostPort()); @@ -181,10 +177,10 @@ public void test() throws Exception { Predicate alwaysHostedOrCurrentNotNull = t -> (t.getHostingGoal() == TabletHostingGoal.ALWAYS && t.hasCurrent()); - Wait.waitFor(() -> alwaysHostedOrCurrentNotNull - .test(getManagerTabletInfo(c, tableId, null).getTabletMetadata()), 60000, 250); + Wait.waitFor(() -> alwaysHostedOrCurrentNotNull.test(getTabletMetadata(c, tableId, null)), + 60000, 250); - final TabletMetadata always = getManagerTabletInfo(c, tableId, null).getTabletMetadata(); + final TabletMetadata always = getTabletMetadata(c, tableId, null); assertTrue(alwaysHostedOrCurrentNotNull.test(always)); assertTrue(always.hasCurrent()); assertEquals(flushed.getLocation().getHostPort(), always.getLast().getHostPort()); @@ -194,10 +190,10 @@ public void test() throws Exception { c.tableOperations().setTabletHostingGoal(tableName, new Range(), TabletHostingGoal.NEVER); Predicate neverHostedOrCurrentNull = t -> (t.getHostingGoal() == TabletHostingGoal.NEVER && !t.hasCurrent()); - Wait.waitFor(() -> neverHostedOrCurrentNull - .test(getManagerTabletInfo(c, tableId, null).getTabletMetadata()), 60000, 250); + Wait.waitFor(() -> neverHostedOrCurrentNull.test(getTabletMetadata(c, tableId, null)), 60000, + 250); - final TabletMetadata never = getManagerTabletInfo(c, tableId, null).getTabletMetadata(); + final TabletMetadata never = getTabletMetadata(c, tableId, null); assertTrue(neverHostedOrCurrentNull.test(never)); assertNull(never.getLocation()); assertEquals(flushed.getLocation().getHostPort(), never.getLast().getHostPort()); @@ -207,10 +203,8 @@ public void test() throws Exception { c.tableOperations().setTabletHostingGoal(tableName, new Range(), TabletHostingGoal.ONDEMAND); Predicate ondemandHosted = t -> t.getHostingGoal() == TabletHostingGoal.ONDEMAND; - Wait.waitFor( - () -> ondemandHosted.test(getManagerTabletInfo(c, tableId, null).getTabletMetadata()), - 60000, 250); - final TabletMetadata ondemand = getManagerTabletInfo(c, tableId, null).getTabletMetadata(); + Wait.waitFor(() -> ondemandHosted.test(getTabletMetadata(c, tableId, null)), 60000, 250); + final TabletMetadata ondemand = getTabletMetadata(c, tableId, null); assertTrue(ondemandHosted.test(ondemand)); assertNull(ondemand.getLocation()); assertEquals(flushed.getLocation().getHostPort(), ondemand.getLast().getHostPort()); @@ -621,11 +615,16 @@ public void testShutdownOnlyTServerWithoutUserTable() throws Exception { } } - public static TabletManagement getManagerTabletInfo(AccumuloClient c, String tableId, - Text endRow) { - try (TabletManagementScanner s = new TabletManagementScanner((ClientContext) c, - new Range(TabletsSection.encodeRow(TableId.of(tableId), endRow)), MetadataTable.NAME)) { - return s.next(); + public static TabletMetadata getTabletMetadata(AccumuloClient c, String tableId, Text endRow) { + var ctx = (ClientContext) c; + try (var tablets = ctx.getAmple().readTablets().forTable(TableId.of(tableId)) + .overlapping(endRow, null).build()) { + var iter = tablets.iterator(); + if (iter.hasNext()) { + return iter.next(); + } } + + return null; } } diff --git a/test/src/main/java/org/apache/accumulo/test/functional/TabletManagementIteratorIT.java b/test/src/main/java/org/apache/accumulo/test/functional/TabletManagementIteratorIT.java index 3b1a688ca8d..f8edad45518 100644 --- a/test/src/main/java/org/apache/accumulo/test/functional/TabletManagementIteratorIT.java +++ b/test/src/main/java/org/apache/accumulo/test/functional/TabletManagementIteratorIT.java @@ -24,7 +24,6 @@ import java.time.Duration; import java.util.ArrayList; import java.util.Collections; -import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; @@ -63,6 +62,7 @@ import org.apache.accumulo.core.manager.thrift.ManagerState; import org.apache.accumulo.core.metadata.MetadataTable; import org.apache.accumulo.core.metadata.TServerInstance; +import org.apache.accumulo.core.metadata.schema.Ample; import org.apache.accumulo.core.metadata.schema.MetadataSchema; import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.CurrentLocationColumnFamily; import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.HostingColumnFamily; @@ -72,8 +72,8 @@ import org.apache.accumulo.core.util.UtilWaitThread; import org.apache.accumulo.harness.AccumuloClusterHarness; import org.apache.accumulo.server.manager.LiveTServerSet; -import org.apache.accumulo.server.manager.state.CurrentState; import org.apache.accumulo.server.manager.state.TabletManagementIterator; +import org.apache.accumulo.server.manager.state.TabletManagementParameters; import org.apache.hadoop.io.Text; import org.junit.jupiter.api.Test; import org.slf4j.Logger; @@ -123,15 +123,15 @@ public void test() throws AccumuloException, AccumuloSecurityException, TableExi // examine a clone of the metadata table, so we can manipulate it copyTable(client, MetadataTable.NAME, metaCopy1); - State state = new State(client); - int tabletsInFlux = findTabletsNeedingAttention(client, metaCopy1, state); + TabletManagementParameters tabletMgmtParams = createParameters(client); + int tabletsInFlux = findTabletsNeedingAttention(client, metaCopy1, tabletMgmtParams); while (tabletsInFlux > 0) { log.debug("Waiting for {} tablets for {}", tabletsInFlux, metaCopy1); UtilWaitThread.sleep(500); copyTable(client, MetadataTable.NAME, metaCopy1); - tabletsInFlux = findTabletsNeedingAttention(client, metaCopy1, state); + tabletsInFlux = findTabletsNeedingAttention(client, metaCopy1, tabletMgmtParams); } - assertEquals(0, findTabletsNeedingAttention(client, metaCopy1, state), + assertEquals(0, findTabletsNeedingAttention(client, metaCopy1, tabletMgmtParams), "No tables should need attention"); // The metadata table stabilized and metaCopy1 contains a copy suitable for testing. Before @@ -143,30 +143,30 @@ public void test() throws AccumuloException, AccumuloSecurityException, TableExi setTabletHostingGoal(client, metaCopy1, t1, TabletHostingGoal.ALWAYS.name()); // t3 is hosted, setting to never will generate a change to unhost tablets setTabletHostingGoal(client, metaCopy1, t3, TabletHostingGoal.NEVER.name()); - state = new State(client); - assertEquals(4, findTabletsNeedingAttention(client, metaCopy1, state), + tabletMgmtParams = createParameters(client); + assertEquals(4, findTabletsNeedingAttention(client, metaCopy1, tabletMgmtParams), "Should have four tablets with hosting goal changes"); // test the assigned case (no location) removeLocation(client, metaCopy1, t3); - assertEquals(2, findTabletsNeedingAttention(client, metaCopy1, state), + assertEquals(2, findTabletsNeedingAttention(client, metaCopy1, tabletMgmtParams), "Should have two tablets without a loc"); // Test setting the operation id on one of the tablets in table t1. Table t1 has two tablets // w/o a location. Only one should need attention because of the operation id. setOperationId(client, metaCopy1, t1); - assertEquals(1, findTabletsNeedingAttention(client, metaCopy1, state), + assertEquals(1, findTabletsNeedingAttention(client, metaCopy1, tabletMgmtParams), "Should have tablets needing attention because of operation id"); // test the cases where the assignment is to a dead tserver reassignLocation(client, metaCopy2, t3); - assertEquals(1, findTabletsNeedingAttention(client, metaCopy2, state), + assertEquals(1, findTabletsNeedingAttention(client, metaCopy2, tabletMgmtParams), "Only 1 of 2 tablets in table t1 should be returned"); // test the bad tablet location state case (inconsistent metadata) - state = new State(client); + tabletMgmtParams = createParameters(client); addDuplicateLocation(client, metaCopy3, t3); - assertEquals(1, findTabletsNeedingAttention(client, metaCopy3, state), + assertEquals(1, findTabletsNeedingAttention(client, metaCopy3, tabletMgmtParams), "Should have 1 tablet that needs a metadata repair"); // clean up @@ -252,13 +252,12 @@ private void removeLocation(AccumuloClient client, String table, String tableNam deleter.close(); } - private int findTabletsNeedingAttention(AccumuloClient client, String table, State state) - throws TableNotFoundException, IOException { + private int findTabletsNeedingAttention(AccumuloClient client, String table, + TabletManagementParameters tabletMgmtParams) throws TableNotFoundException, IOException { int results = 0; List resultList = new ArrayList<>(); try (Scanner scanner = client.createScanner(table, Authorizations.EMPTY)) { - TabletManagementIterator.configureScanner(scanner, state); - log.debug("Current state = {}", state); + TabletManagementIterator.configureScanner(scanner, tabletMgmtParams); scanner.updateScanIteratorOption("tabletChange", "debug", "1"); for (Entry e : scanner) { if (e != null) { @@ -345,71 +344,29 @@ private void dropTables(AccumuloClient client, String... tables) } } - private static class State implements CurrentState { - - final ClientContext context; - - State(AccumuloClient client) { - this.context = (ClientContext) client; - } - - private Set tservers; - private Set onlineTables; - - @Override - public Set onlineTabletServers() { - HashSet tservers = new HashSet<>(); - for (String tserver : context.instanceOperations().getTabletServers()) { - try { - var zPath = ServiceLock.path(ZooUtil.getRoot(context.instanceOperations().getInstanceId()) - + Constants.ZTSERVERS + "/" + tserver); - long sessionId = ServiceLock.getSessionId(context.getZooCache(), zPath); - tservers.add(new TServerInstance(tserver, sessionId)); - } catch (Exception e) { - throw new RuntimeException(e); - } + private static TabletManagementParameters createParameters(AccumuloClient client) { + var context = (ClientContext) client; + Set onlineTables = Sets.filter(context.getTableIdToNameMap().keySet(), + tableId -> context.getTableState(tableId) == TableState.ONLINE); + + HashSet tservers = new HashSet<>(); + for (String tserver : context.instanceOperations().getTabletServers()) { + try { + var zPath = ServiceLock.path(ZooUtil.getRoot(context.instanceOperations().getInstanceId()) + + Constants.ZTSERVERS + "/" + tserver); + long sessionId = ServiceLock.getSessionId(context.getZooCache(), zPath); + tservers.add(new TServerInstance(tserver, sessionId)); + } catch (Exception e) { + throw new RuntimeException(e); } - this.tservers = Collections.unmodifiableSet(tservers); - return tservers; } - @Override - public LiveTServerSet.LiveTServersSnapshot tserversSnapshot() { - return new LiveTServerSet.LiveTServersSnapshot(onlineTabletServers(), new HashMap<>()); - } - - @Override - public Set onlineTables() { - Set onlineTables = context.getTableIdToNameMap().keySet(); - this.onlineTables = - Sets.filter(onlineTables, tableId -> context.getTableState(tableId) == TableState.ONLINE); - return this.onlineTables; - } - - @Override - public Set migrationsSnapshot() { - return Collections.emptySet(); - } - - @Override - public Set shutdownServers() { - return Collections.emptySet(); - } - - @Override - public ManagerState getManagerState() { - return ManagerState.NORMAL; - } - - @Override - public Map> getCompactionHints() { - return Map.of(); - } - - @Override - public String toString() { - return "tservers: " + tservers + " onlineTables: " + onlineTables; - } + return new TabletManagementParameters(ManagerState.NORMAL, + Map.of( + Ample.DataLevel.ROOT, true, Ample.DataLevel.USER, true, Ample.DataLevel.METADATA, true), + onlineTables, + new LiveTServerSet.LiveTServersSnapshot(tservers, + Map.of(Constants.DEFAULT_RESOURCE_GROUP_NAME, tservers)), + Set.of(), Map.of(), Ample.DataLevel.USER, Map.of(), true); } - } diff --git a/test/src/main/java/org/apache/accumulo/test/manager/SuspendedTabletsIT.java b/test/src/main/java/org/apache/accumulo/test/manager/SuspendedTabletsIT.java index cd183ef364c..e8c38a5bc46 100644 --- a/test/src/main/java/org/apache/accumulo/test/manager/SuspendedTabletsIT.java +++ b/test/src/main/java/org/apache/accumulo/test/manager/SuspendedTabletsIT.java @@ -52,11 +52,11 @@ import org.apache.accumulo.core.clientImpl.ClientTabletCache; import org.apache.accumulo.core.conf.ClientProperty; import org.apache.accumulo.core.conf.Property; -import org.apache.accumulo.core.data.Range; +import org.apache.accumulo.core.data.TableId; import org.apache.accumulo.core.dataImpl.KeyExtent; import org.apache.accumulo.core.metadata.MetadataTable; -import org.apache.accumulo.core.metadata.RootTable; import org.apache.accumulo.core.metadata.TServerInstance; +import org.apache.accumulo.core.metadata.schema.Ample; import org.apache.accumulo.core.metadata.schema.TabletMetadata; import org.apache.accumulo.core.metadata.schema.TabletMetadata.LocationType; import org.apache.accumulo.core.rpc.clients.ThriftClientTypes; @@ -65,7 +65,6 @@ import org.apache.accumulo.minicluster.ServerType; import org.apache.accumulo.miniclusterImpl.MiniAccumuloConfigImpl; import org.apache.accumulo.miniclusterImpl.ProcessReference; -import org.apache.accumulo.server.manager.state.TabletManagementScanner; import org.apache.accumulo.test.functional.ConfigurableMacBase; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.Text; @@ -135,11 +134,11 @@ public void setUp() throws Exception { // Wait for the balancer to assign all metadata tablets to the chosen server. ClientContext ctx = (ClientContext) client; - TabletLocations tl = TabletLocations.retrieve(ctx, MetadataTable.NAME, RootTable.NAME); + TabletLocations tl = TabletLocations.retrieve(ctx, MetadataTable.NAME); while (tl.hosted.keySet().size() != 1 || !tl.hosted.containsKey(metadataServer)) { log.info("Metadata tablets are not hosted on the correct server. Waiting for balancer..."); Thread.sleep(1000L); - tl = TabletLocations.retrieve(ctx, MetadataTable.NAME, RootTable.NAME); + tl = TabletLocations.retrieve(ctx, MetadataTable.NAME); } log.info("Metadata tablets are now hosted on {}", metadataServer); } @@ -403,11 +402,6 @@ private static class TabletLocations { public static TabletLocations retrieve(final ClientContext ctx, final String tableName) throws Exception { - return retrieve(ctx, tableName, MetadataTable.NAME); - } - - public static TabletLocations retrieve(final ClientContext ctx, final String tableName, - final String metaName) throws Exception { int sleepTime = 200; int remainingAttempts = 30; @@ -415,7 +409,7 @@ public static TabletLocations retrieve(final ClientContext ctx, final String tab try { FutureTask tlsFuture = new FutureTask<>(() -> { TabletLocations answer = new TabletLocations(); - answer.scan(ctx, tableName, metaName); + answer.scan(ctx, tableName); return answer; }); THREAD_POOL.execute(tlsFuture); @@ -434,12 +428,14 @@ public static TabletLocations retrieve(final ClientContext ctx, final String tab } } - private void scan(ClientContext ctx, String tableName, String metaName) { + private void scan(ClientContext ctx, String tableName) { Map idMap = ctx.tableOperations().tableIdMap(); String tableId = Objects.requireNonNull(idMap.get(tableName)); - try (var scanner = new TabletManagementScanner(ctx, new Range(), metaName)) { + var level = Ample.DataLevel.of(TableId.of(tableId)); + try (var tablets = ctx.getAmple().readTablets().forLevel(level).build()) { + var scanner = tablets.iterator(); while (scanner.hasNext()) { - final TabletMetadata tm = scanner.next().getTabletMetadata(); + final TabletMetadata tm = scanner.next(); final KeyExtent ke = tm.getExtent(); if (!tm.getTableId().canonical().equals(tableId)) { diff --git a/test/src/main/java/org/apache/accumulo/test/performance/NullTserver.java b/test/src/main/java/org/apache/accumulo/test/performance/NullTserver.java index 63175fb26c6..af8f9ddd02e 100644 --- a/test/src/main/java/org/apache/accumulo/test/performance/NullTserver.java +++ b/test/src/main/java/org/apache/accumulo/test/performance/NullTserver.java @@ -53,7 +53,6 @@ import org.apache.accumulo.core.dataImpl.thrift.TSummaryRequest; import org.apache.accumulo.core.dataImpl.thrift.UpdateErrors; import org.apache.accumulo.core.manager.thrift.TabletServerStatus; -import org.apache.accumulo.core.metadata.MetadataTable; import org.apache.accumulo.core.metadata.TServerInstance; import org.apache.accumulo.core.metadata.schema.Ample.DataLevel; import org.apache.accumulo.core.metadata.schema.TabletMetadata; @@ -73,7 +72,6 @@ import org.apache.accumulo.server.ServerContext; import org.apache.accumulo.server.client.ClientServiceHandler; import org.apache.accumulo.server.manager.state.Assignment; -import org.apache.accumulo.server.manager.state.TabletManagementScanner; import org.apache.accumulo.server.manager.state.TabletStateStore; import org.apache.accumulo.server.rpc.TServerUtils; import org.apache.accumulo.server.rpc.ThriftProcessorTypes; @@ -311,12 +309,13 @@ public static void main(String[] args) throws Exception { // read the locations for the table Range tableRange = new KeyExtent(tableId, null, null).toMetaRange(); List assignments = new ArrayList<>(); - try (var s = new TabletManagementScanner(context, tableRange, MetadataTable.NAME)) { + try (var tablets = context.getAmple().readTablets().forLevel(DataLevel.USER).build()) { long randomSessionID = opts.port; TServerInstance instance = new TServerInstance(addr, randomSessionID); + var s = tablets.iterator(); while (s.hasNext()) { - TabletMetadata next = s.next().getTabletMetadata(); + TabletMetadata next = s.next(); assignments.add(new Assignment(next.getExtent(), instance, next.getLast())); } } From 300b39f7344a1adf4793b1a4007c5062d33276db Mon Sep 17 00:00:00 2001 From: Dave Marion Date: Tue, 31 Oct 2023 12:48:41 -0400 Subject: [PATCH 12/12] Fixes WaitForBalanceIT (#3906) It appears that Manager.waitForBalance may return and then the isBalanced method returns false. I think this is due to the fact that the current location for the Tablet is set in the TabletServer AssignmentHandler, which leaves a gap where InstanceOperations.waitForBalance returns but the locations are not yet set. Co-authored-by: Dom G. --- .../org/apache/accumulo/test/WaitForBalanceIT.java | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/test/src/main/java/org/apache/accumulo/test/WaitForBalanceIT.java b/test/src/main/java/org/apache/accumulo/test/WaitForBalanceIT.java index 1c0b33ce378..a528d967c49 100644 --- a/test/src/main/java/org/apache/accumulo/test/WaitForBalanceIT.java +++ b/test/src/main/java/org/apache/accumulo/test/WaitForBalanceIT.java @@ -31,6 +31,8 @@ import org.apache.accumulo.core.client.Accumulo; import org.apache.accumulo.core.client.AccumuloClient; import org.apache.accumulo.core.client.Scanner; +import org.apache.accumulo.core.client.admin.NewTableConfiguration; +import org.apache.accumulo.core.client.admin.TabletHostingGoal; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Value; import org.apache.accumulo.core.metadata.MetadataTable; @@ -40,11 +42,10 @@ import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.TabletColumnFamily; import org.apache.accumulo.core.security.Authorizations; import org.apache.accumulo.test.functional.ConfigurableMacBase; +import org.apache.accumulo.test.util.Wait; import org.apache.hadoop.io.Text; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; -@Disabled // ELASTICITY_TODO public class WaitForBalanceIT extends ConfigurableMacBase { private static final int NUM_SPLITS = 50; @@ -64,7 +65,9 @@ public void test() throws Exception { c.instanceOperations().waitForBalance(); assertTrue(isBalanced(c)); final String tableName = getUniqueNames(1)[0]; - c.tableOperations().create(tableName); + NewTableConfiguration ntc = new NewTableConfiguration(); + ntc.withInitialHostingGoal(TabletHostingGoal.ALWAYS); + c.tableOperations().create(tableName, ntc); c.instanceOperations().waitForBalance(); final SortedSet partitionKeys = new TreeSet<>(); for (int i = 0; i < NUM_SPLITS; i++) { @@ -73,7 +76,7 @@ public void test() throws Exception { c.tableOperations().addSplits(tableName, partitionKeys); assertFalse(isBalanced(c)); c.instanceOperations().waitForBalance(); - assertTrue(isBalanced(c)); + Wait.waitFor(() -> isBalanced(c)); } }