diff --git a/core/src/main/java/org/apache/accumulo/core/gc/Reference.java b/core/src/main/java/org/apache/accumulo/core/gc/Reference.java index cdffbd7eebf..4c67bfd31d2 100644 --- a/core/src/main/java/org/apache/accumulo/core/gc/Reference.java +++ b/core/src/main/java/org/apache/accumulo/core/gc/Reference.java @@ -31,6 +31,11 @@ public interface Reference { */ boolean isDirectory(); + /** + * Only return true if the reference is a scan. + */ + boolean isScan(); + /** * Get the {@link TableId} of the reference. */ @@ -42,6 +47,8 @@ public interface Reference { * {@link org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.DataFileColumnFamily} * A directory will be read from the "srv:dir" column family: * {@link org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily} + * A scan will be read from the Tablet "scan" column family: + * {@link org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ScanFileColumnFamily} */ String getMetadataEntry(); } diff --git a/core/src/main/java/org/apache/accumulo/core/gc/ReferenceDirectory.java b/core/src/main/java/org/apache/accumulo/core/gc/ReferenceDirectory.java index b9a6589d9f5..5491020aa4f 100644 --- a/core/src/main/java/org/apache/accumulo/core/gc/ReferenceDirectory.java +++ b/core/src/main/java/org/apache/accumulo/core/gc/ReferenceDirectory.java @@ -28,7 +28,7 @@ public class ReferenceDirectory extends ReferenceFile { private final String tabletDir; // t-0003 public ReferenceDirectory(TableId tableId, String dirName) { - super(tableId, dirName); + super(tableId, dirName, false); MetadataSchema.TabletsSection.ServerColumnFamily.validateDirCol(dirName); this.tabletDir = dirName; } diff --git a/core/src/main/java/org/apache/accumulo/core/gc/ReferenceFile.java b/core/src/main/java/org/apache/accumulo/core/gc/ReferenceFile.java index 7f796e8de91..b9eece90d58 100644 --- a/core/src/main/java/org/apache/accumulo/core/gc/ReferenceFile.java +++ b/core/src/main/java/org/apache/accumulo/core/gc/ReferenceFile.java @@ -29,13 +29,23 @@ public class ReferenceFile implements Reference, Comparable { // parts of an absolute URI, like "hdfs://1.2.3.4/accumulo/tables/2a/t-0003" public final TableId tableId; // 2a + public final boolean isScan; // the exact string that is stored in the metadata protected final String metadataEntry; - public ReferenceFile(TableId tableId, String metadataEntry) { + protected ReferenceFile(TableId tableId, String metadataEntry, boolean isScan) { this.tableId = Objects.requireNonNull(tableId); this.metadataEntry = Objects.requireNonNull(metadataEntry); + this.isScan = isScan; + } + + public static ReferenceFile forFile(TableId tableId, String metadataEntry) { + return new ReferenceFile(tableId, metadataEntry, false); + } + + public static ReferenceFile forScan(TableId tableId, String metadataEntry) { + return new ReferenceFile(tableId, metadataEntry, true); } @Override @@ -43,6 +53,11 @@ public boolean isDirectory() { return false; } + @Override + public boolean isScan() { + return isScan; + } + @Override public TableId getTableId() { return tableId; diff --git a/server/base/src/main/java/org/apache/accumulo/server/gc/AllVolumesDirectory.java b/server/base/src/main/java/org/apache/accumulo/server/gc/AllVolumesDirectory.java index 2dbc1705f35..aff8dd5d039 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/gc/AllVolumesDirectory.java +++ b/server/base/src/main/java/org/apache/accumulo/server/gc/AllVolumesDirectory.java @@ -32,7 +32,7 @@ public class AllVolumesDirectory extends ReferenceFile { public AllVolumesDirectory(TableId tableId, String dirName) { - super(tableId, getDeleteTabletOnAllVolumesUri(tableId, dirName)); + super(tableId, getDeleteTabletOnAllVolumesUri(tableId, dirName), false); } private static String getDeleteTabletOnAllVolumesUri(TableId tableId, String dirName) { diff --git a/server/base/src/main/java/org/apache/accumulo/server/metadata/ServerAmpleImpl.java b/server/base/src/main/java/org/apache/accumulo/server/metadata/ServerAmpleImpl.java index 65fa86b8146..223a9cf112b 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/metadata/ServerAmpleImpl.java +++ b/server/base/src/main/java/org/apache/accumulo/server/metadata/ServerAmpleImpl.java @@ -216,7 +216,9 @@ public void deleteGcCandidates(DataLevel level, Collection candidat if (level == DataLevel.ROOT) { if (type == GcCandidateType.INUSE) { - // Deletion of INUSE candidates is not supported in 2.1.x. + // Since there is only a single root tablet, supporting INUSE candidate deletions would add + // additional code complexity without any substantial benefit. + // Therefore, deletion of root INUSE candidates is not supported. return; } mutateRootGcCandidates(rgcc -> rgcc.remove(candidates.stream())); diff --git a/server/base/src/main/java/org/apache/accumulo/server/util/ManagerMetadataUtil.java b/server/base/src/main/java/org/apache/accumulo/server/util/ManagerMetadataUtil.java index 6073822e235..5d25027d874 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/util/ManagerMetadataUtil.java +++ b/server/base/src/main/java/org/apache/accumulo/server/util/ManagerMetadataUtil.java @@ -180,6 +180,8 @@ public static void replaceDatafiles(ServerContext context, KeyExtent extent, TServerInstance tServerInstance, Location lastLocation, ServiceLock zooLock, Optional ecid) { + // Write candidates before the mutation to ensure that a process failure after a mutation would + // not affect candidate creation context.getAmple().putGcCandidates(extent.tableId(), datafilesToDelete); TabletMutator tablet = context.getAmple().mutateTablet(extent); @@ -204,6 +206,8 @@ public static void replaceDatafiles(ServerContext context, KeyExtent extent, tablet.putZooLock(zooLock); tablet.mutate(); + // Write candidates again to avoid a possible race condition when removing InUse candidates + context.getAmple().putGcCandidates(extent.tableId(), datafilesToDelete); } /** diff --git a/server/base/src/main/java/org/apache/accumulo/server/util/MetadataTableUtil.java b/server/base/src/main/java/org/apache/accumulo/server/util/MetadataTableUtil.java index 34d0381673f..60b16e55738 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/util/MetadataTableUtil.java +++ b/server/base/src/main/java/org/apache/accumulo/server/util/MetadataTableUtil.java @@ -332,8 +332,8 @@ public static void deleteTable(TableId tableId, boolean insertDeletes, ServerCon if (key.getColumnFamily().equals(DataFileColumnFamily.NAME)) { StoredTabletFile stf = new StoredTabletFile(key.getColumnQualifierData().toString()); - bw.addMutation( - ample.createDeleteMutation(new ReferenceFile(tableId, stf.getMetaUpdateDelete()))); + bw.addMutation(ample + .createDeleteMutation(ReferenceFile.forFile(tableId, stf.getMetaUpdateDelete()))); } if (ServerColumnFamily.DIRECTORY_COLUMN.hasColumns(key)) { diff --git a/server/gc/src/main/java/org/apache/accumulo/gc/GCRun.java b/server/gc/src/main/java/org/apache/accumulo/gc/GCRun.java index 6a1b9e5962c..c03d4496caa 100644 --- a/server/gc/src/main/java/org/apache/accumulo/gc/GCRun.java +++ b/server/gc/src/main/java/org/apache/accumulo/gc/GCRun.java @@ -189,36 +189,40 @@ public Stream getReferences() { // there is a lot going on in this "one line" so see below for more info var tabletReferences = tabletStream.flatMap(tm -> { + var tableId = tm.getTableId(); // verify that dir and prev row entries present for to check for complete row scan - log.trace("tablet metadata table id: {}, end row:{}, dir:{}, saw: {}, prev row: {}", - tm.getTableId(), tm.getEndRow(), tm.getDirName(), tm.sawPrevEndRow(), tm.getPrevEndRow()); + log.trace("tablet metadata table id: {}, end row:{}, dir:{}, saw: {}, prev row: {}", tableId, + tm.getEndRow(), tm.getDirName(), tm.sawPrevEndRow(), tm.getPrevEndRow()); if (tm.getDirName() == null || tm.getDirName().isEmpty() || !tm.sawPrevEndRow()) { - throw new IllegalStateException("possible incomplete metadata scan for table id: " - + tm.getTableId() + ", end row: " + tm.getEndRow() + ", dir: " + tm.getDirName() - + ", saw prev row: " + tm.sawPrevEndRow()); + throw new IllegalStateException("possible incomplete metadata scan for table id: " + tableId + + ", end row: " + tm.getEndRow() + ", dir: " + tm.getDirName() + ", saw prev row: " + + tm.sawPrevEndRow()); } // combine all the entries read from file and scan columns in the metadata table - Stream fileStream = tm.getFiles().stream(); + Stream stfStream = tm.getFiles().stream(); + // map the files to Reference objects + var fileStream = stfStream.map(f -> ReferenceFile.forFile(tableId, f.getMetaUpdateDelete())); + // scans are normally empty, so only introduce a layer of indirection when needed final var tmScans = tm.getScans(); if (!tmScans.isEmpty()) { - fileStream = Stream.concat(fileStream, tmScans.stream()); + var scanStream = + tmScans.stream().map(s -> ReferenceFile.forScan(tableId, s.getMetaUpdateDelete())); + fileStream = Stream.concat(fileStream, scanStream); } - // map the files to Reference objects - var stream = fileStream.map(f -> new ReferenceFile(tm.getTableId(), f.getMetaUpdateDelete())); - // if dirName is populated then we have a tablet directory aka srv:dir + // if dirName is populated, then we have a tablet directory aka srv:dir if (tm.getDirName() != null) { // add the tablet directory to the stream - var tabletDir = new ReferenceDirectory(tm.getTableId(), tm.getDirName()); - stream = Stream.concat(stream, Stream.of(tabletDir)); + var tabletDir = new ReferenceDirectory(tableId, tm.getDirName()); + fileStream = Stream.concat(fileStream, Stream.of(tabletDir)); } - return stream; + return fileStream; }); var scanServerRefs = context.getAmple().getScanServerFileReferences() - .map(sfr -> new ReferenceFile(sfr.getTableId(), sfr.getPathStr())); + .map(sfr -> ReferenceFile.forScan(sfr.getTableId(), sfr.getPathStr())); return Stream.concat(tabletReferences, scanServerRefs); } diff --git a/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java b/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java index cc77197a418..6800b9a84da 100644 --- a/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java +++ b/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java @@ -144,7 +144,7 @@ private SortedMap makeRelative(Collection candi private void removeCandidatesInUse(GarbageCollectionEnvironment gce, SortedMap candidateMap) throws InterruptedException { - List inUseCandidates = new ArrayList<>(); + List candidateEntriesToBeDeleted = new ArrayList<>(); Set tableIdsBefore = gce.getCandidateTableIDs(); Set tableIdsSeen = new HashSet<>(); Iterator iter = gce.getReferences().iterator(); @@ -163,8 +163,7 @@ private void removeCandidatesInUse(GarbageCollectionEnvironment gce, GcCandidate gcTemp = candidateMap.remove(dir); if (gcTemp != null) { log.debug("Directory Candidate was still in use by dir ref: {}", dir); - // Intentionally not adding dir candidates to inUseCandidates as they are only added once. - // If dir candidates are deleted, due to being in use, nothing will add them again. + // Do not add dir candidates to candidateEntriesToBeDeleted as they are only created once. } } else { String reference = ref.getMetadataEntry(); @@ -183,15 +182,18 @@ private void removeCandidatesInUse(GarbageCollectionEnvironment gce, GcCandidate gcTemp = candidateMap.remove(relativePath); if (gcTemp != null) { log.debug("File Candidate was still in use: {}", relativePath); - inUseCandidates.add(gcTemp); + // Prevent deletion of candidates that are still in use by scans, because they won't be + // recreated once the scan is finished. + if (!ref.isScan()) { + candidateEntriesToBeDeleted.add(gcTemp); + } } String dir = relativePath.substring(0, relativePath.lastIndexOf('/')); GcCandidate gcT = candidateMap.remove(dir); if (gcT != null) { log.debug("Directory Candidate was still in use by file ref: {}", relativePath); - // Intentionally not adding dir candidates to inUseCandidates as they are only added once. - // If dir candidates are deleted, due to being in use, nothing will add them again. + // Do not add dir candidates to candidateEntriesToBeDeleted as they are only created once. } } } @@ -199,7 +201,7 @@ private void removeCandidatesInUse(GarbageCollectionEnvironment gce, ensureAllTablesChecked(Collections.unmodifiableSet(tableIdsBefore), Collections.unmodifiableSet(tableIdsSeen), Collections.unmodifiableSet(tableIdsAfter)); if (gce.canRemoveInUseCandidates()) { - gce.deleteGcCandidates(inUseCandidates, GcCandidateType.INUSE); + gce.deleteGcCandidates(candidateEntriesToBeDeleted, GcCandidateType.INUSE); } } diff --git a/server/gc/src/test/java/org/apache/accumulo/gc/GarbageCollectionTest.java b/server/gc/src/test/java/org/apache/accumulo/gc/GarbageCollectionTest.java index 0536bda699a..8e9a2d1e070 100644 --- a/server/gc/src/test/java/org/apache/accumulo/gc/GarbageCollectionTest.java +++ b/server/gc/src/test/java/org/apache/accumulo/gc/GarbageCollectionTest.java @@ -60,7 +60,7 @@ static class TestGCE implements GarbageCollectionEnvironment { Map references = new TreeMap<>(); HashSet tableIds = new HashSet<>(); - ArrayList deletes = new ArrayList<>(); + ArrayList fileDeletions = new ArrayList<>(); ArrayList tablesDirsToDelete = new ArrayList<>(); TreeMap filesToReplicate = new TreeMap<>(); boolean deleteInUseRefs = false; @@ -121,6 +121,9 @@ public Stream getReferences() { public void deleteGcCandidates(Collection refCandidates, GcCandidateType type) { // Mimic ServerAmpleImpl behavior for root InUse Candidates if (type.equals(GcCandidateType.INUSE) && this.level.equals(Ample.DataLevel.ROOT)) { + // Since there is only a single root tablet, supporting INUSE candidate deletions would add + // additional code complexity without any substantial benefit. + // Therefore, deletion of root INUSE candidates is not supported. return; } refCandidates.forEach(gcCandidate -> deletedCandidates.put(gcCandidate, type)); @@ -136,7 +139,7 @@ public Map getTableIDs() { @Override public void deleteConfirmedCandidates(SortedMap candidateMap) { - deletes.addAll(candidateMap.values()); + fileDeletions.addAll(candidateMap.values()); this.candidates.removeAll(candidateMap.values()); } @@ -147,7 +150,7 @@ public void deleteTableDirIfEmpty(TableId tableID) { public void addFileReference(String tableId, String endRow, String file) { TableId tid = TableId.of(tableId); - references.put(tableId + ":" + endRow + ":" + file, new ReferenceFile(tid, file)); + references.put(tableId + ":" + endRow + ":" + file, ReferenceFile.forFile(tid, file)); tableIds.add(tid); } @@ -167,6 +170,17 @@ public void removeDirReference(String tableId, String endRow) { removeLastTableIdRef(TableId.of(tableId)); } + public void addScanReference(String tableId, String endRow, String scan) { + TableId tid = TableId.of(tableId); + references.put(tableId + ":" + endRow + ":scan:" + scan, ReferenceFile.forScan(tid, scan)); + tableIds.add(tid); + } + + public void removeScanReference(String tableId, String endRow, String scan) { + references.remove(tableId + ":" + endRow + ":scan:" + scan); + removeLastTableIdRef(TableId.of(tableId)); + } + /* * this is to be called from removeDirReference or removeFileReference. * @@ -216,12 +230,12 @@ public Set getCandidateTableIDs() { } } - private void assertRemoved(TestGCE gce, GcCandidate... candidates) { + private void assertFileDeleted(TestGCE gce, GcCandidate... candidates) { for (GcCandidate candidate : candidates) { - assertTrue(gce.deletes.remove(candidate)); + assertTrue(gce.fileDeletions.remove(candidate)); } - assertEquals(0, gce.deletes.size(), "Deletes not empty: " + gce.deletes); + assertEquals(0, gce.fileDeletions.size(), "Deletes not empty: " + gce.fileDeletions); } private void assertNoCandidatesRemoved(TestGCE gce) { @@ -257,7 +271,7 @@ public void minimalDelete() throws Exception { GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm(); gca.collect(gce); - assertRemoved(gce, candidate); + assertFileDeleted(gce, candidate); } @Test @@ -276,29 +290,29 @@ public void testBasic() throws Exception { GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm(); gca.collect(gce); - assertRemoved(gce); + assertFileDeleted(gce); // Remove the reference to this flush file, run the GC which should not trim it from the // candidates, and assert that it's gone gce.removeFileReference("4", null, "hdfs://foo.com:6000/accumulo/tables/4/t0/F000.rf"); gca.collect(gce); - assertRemoved(gce, candOne); + assertFileDeleted(gce, candOne); // Removing a reference to a file that wasn't in the candidates should do nothing gce.removeFileReference("4", null, "hdfs://foo.com:6000/accumulo/tables/4/t0/F002.rf"); gca.collect(gce); - assertRemoved(gce); + assertFileDeleted(gce); // Remove the reference to a file in the candidates should cause it to be removed gce.removeFileReference("4", null, "hdfs://foo:6000/accumulo/tables/4/t0/F001.rf"); gca.collect(gce); - assertRemoved(gce, candTwo); + assertFileDeleted(gce, candTwo); // Adding more candidates which do not have references should be removed var candThree = gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/4/t0/F003.rf"); var candFour = gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/4/t0/F004.rf"); gca.collect(gce); - assertRemoved(gce, candThree, candFour); + assertFileDeleted(gce, candThree, candFour); } @@ -353,29 +367,29 @@ public void testBasic2() throws Exception { GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm(); gca.collect(gce); - assertRemoved(gce, toBeRemoved); + assertFileDeleted(gce, toBeRemoved); // Remove the reference to this flush file, run the GC which should not trim it from the // candidates, and assert that it's gone gce.removeFileReference("4", null, "hdfs://foo.com:6000/accumulo/tables/4/t0/F000.rf"); gca.collect(gce); - assertRemoved(gce, candOne); + assertFileDeleted(gce, candOne); // Removing a reference to a file that wasn't in the candidates should do nothing gce.removeFileReference("4", null, "hdfs://foo.com:6000/accumulo/tables/4/t0/F002.rf"); gca.collect(gce); - assertRemoved(gce); + assertFileDeleted(gce); // Remove the reference to a file in the candidates should cause it to be removed gce.removeFileReference("4", null, "hdfs://foo:6000/accumulo/tables/4/t0/F001.rf"); gca.collect(gce); - assertRemoved(gce, candTwo); + assertFileDeleted(gce, candTwo); // Adding more candidates which do no have references should be removed var candThree = gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/4/t0/F003.rf"); var candFour = gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/4/t0/F004.rf"); gca.collect(gce); - assertRemoved(gce, candThree, candFour); + assertFileDeleted(gce, candThree, candFour); } /** @@ -397,7 +411,7 @@ public void emptyPathsTest() throws Exception { GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm(); gca.collect(gce); - assertRemoved(gce, candidate); + assertFileDeleted(gce, candidate); } @Test @@ -418,7 +432,7 @@ public void testRelative() throws Exception { // All candidates currently have references gca.collect(gce); - assertRemoved(gce); + assertFileDeleted(gce); List refsToRemove = new ArrayList<>(); refsToRemove.add(new String[] {"4", "/t0/F000.rf"}); @@ -430,28 +444,28 @@ public void testRelative() throws Exception { for (int i = 0; i < 2; i++) { gce.removeFileReference(refsToRemove.get(i)[0], null, refsToRemove.get(i)[1]); gca.collect(gce); - assertRemoved(gce); + assertFileDeleted(gce); } gce.removeFileReference(refsToRemove.get(2)[0], null, refsToRemove.get(2)[1]); gca.collect(gce); - assertRemoved(gce, candOne); + assertFileDeleted(gce, candOne); gce.removeFileReference("4", null, "/t0/F001.rf"); gca.collect(gce); - assertRemoved(gce, candThree); + assertFileDeleted(gce, candThree); // add absolute candidate for file that already has a relative candidate var candFour = gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/4/t0/F002.rf"); gca.collect(gce); - assertRemoved(gce); + assertFileDeleted(gce); gce.removeFileReference("4", null, "/t0/F002.rf"); gca.collect(gce); - assertRemoved(gce, candFour); + assertFileDeleted(gce, candFour); gca.collect(gce); - assertRemoved(gce, candTwo); + assertFileDeleted(gce, candTwo); } @Test @@ -472,7 +486,7 @@ public void testBlip() throws Exception { // Nothing should be removed because all candidates exist within a blip gca.collect(gce); - assertRemoved(gce); + assertFileDeleted(gce); // Remove the first blip gce.blips.remove("/4/b-0"); @@ -480,18 +494,18 @@ public void testBlip() throws Exception { // And we should lose all files in that blip and the blip directory itself -- relative and // absolute gca.collect(gce); - assertRemoved(gce, new GcCandidate("/4/b-0", 0L), new GcCandidate("/4/b-0/F002.rf", 1L), + assertFileDeleted(gce, new GcCandidate("/4/b-0", 0L), new GcCandidate("/4/b-0/F002.rf", 1L), new GcCandidate("hdfs://foo.com:6000/accumulo/tables/4/b-0/F001.rf", 2L)); gce.blips.remove("hdfs://foo.com:6000/accumulo/tables/5/b-0"); // Same as above, we should lose relative and absolute for a relative or absolute blip gca.collect(gce); - assertRemoved(gce, new GcCandidate("/5/b-0", 3L), new GcCandidate("/5/b-0/F002.rf", 4L), + assertFileDeleted(gce, new GcCandidate("/5/b-0", 3L), new GcCandidate("/5/b-0/F002.rf", 4L), new GcCandidate("hdfs://foo.com:6000/accumulo/tables/5/b-0/F001.rf", 5L)); gca.collect(gce); - assertRemoved(gce); + assertFileDeleted(gce); } @Test @@ -528,21 +542,21 @@ public void testDirectories() throws Exception { // A directory reference does not preclude a candidate file beneath that directory from deletion gca.collect(gce); - assertRemoved(gce, new GcCandidate("/4/t-0/F002.rf", 1L)); + assertFileDeleted(gce, new GcCandidate("/4/t-0/F002.rf", 1L)); // Removing the dir reference for a table will delete all tablet directories gce.removeDirReference("5", null); gca.collect(gce); - assertRemoved(gce, new GcCandidate("hdfs://foo.com:6000/accumulo/tables/5/t-0", 2L)); + assertFileDeleted(gce, new GcCandidate("hdfs://foo.com:6000/accumulo/tables/5/t-0", 2L)); gce.removeDirReference("4", null); gca.collect(gce); - assertRemoved(gce, new GcCandidate("/4/t-0", 0L)); + assertFileDeleted(gce, new GcCandidate("/4/t-0", 0L)); gce.removeDirReference("6", null); gce.removeDirReference("7", null); gca.collect(gce); - assertRemoved(gce, new GcCandidate("/6/t-0", 3L), + assertFileDeleted(gce, new GcCandidate("/6/t-0", 3L), new GcCandidate("hdfs://foo:6000/accumulo/tables/7/t-0/", 4L)); gce.removeFileReference("8", "m", "/t-0/F00.rf"); @@ -552,13 +566,13 @@ public void testDirectories() throws Exception { gce.removeFileReference("e", "m", "../c/t-0/F00.rf"); gce.removeFileReference("f", "m", "../d/t-0/F00.rf"); gca.collect(gce); - assertRemoved(gce, new GcCandidate("/8/t-0", 5L), + assertFileDeleted(gce, new GcCandidate("/8/t-0", 5L), new GcCandidate("hdfs://foo:6000/accumulo/tables/9/t-0", 6L), new GcCandidate("/a/t-0", 7L), new GcCandidate("hdfs://foo:6000/accumulo/tables/b/t-0", 8L), new GcCandidate("/c/t-0", 9L), new GcCandidate("hdfs://foo:6000/accumulo/tables/d/t-0", 10L)); gca.collect(gce); - assertRemoved(gce); + assertFileDeleted(gce); } @Test @@ -596,23 +610,23 @@ public void testCustomDirectories() throws Exception { // A directory reference does not preclude a candidate file beneath that directory from deletion gca.collect(gce); - assertRemoved(gce, candidates.get(2)); + assertFileDeleted(gce, candidates.get(2)); // Removing the dir reference for a table will delete all tablet directories gce.removeDirReference("5", null); // but we need to add a file ref gce.addFileReference("8", "m", "/t-0/F00.rf"); gca.collect(gce); - assertRemoved(gce, candidates.get(3)); + assertFileDeleted(gce, candidates.get(3)); gce.removeDirReference("4", null); gca.collect(gce); - assertRemoved(gce, candidates.get(1)); + assertFileDeleted(gce, candidates.get(1)); gce.removeDirReference("6", null); gce.removeDirReference("7", null); gca.collect(gce); - assertRemoved(gce, candidates.get(4), candidates.get(5)); + assertFileDeleted(gce, candidates.get(4), candidates.get(5)); gce.removeFileReference("8", "m", "/t-0/F00.rf"); gce.removeFileReference("9", "m", "/t-0/F00.rf"); @@ -621,11 +635,11 @@ public void testCustomDirectories() throws Exception { gce.removeFileReference("e", "m", "../c/t-0/F00.rf"); gce.removeFileReference("f", "m", "../d/t-0/F00.rf"); gca.collect(gce); - assertRemoved(gce, candidates.get(6), candidates.get(7), candidates.get(8), candidates.get(9), - candidates.get(10), candidates.get(11)); + assertFileDeleted(gce, candidates.get(6), candidates.get(7), candidates.get(8), + candidates.get(9), candidates.get(10), candidates.get(11)); gca.collect(gce); - assertRemoved(gce); + assertFileDeleted(gce); } private void badRefTest(String ref) { @@ -696,8 +710,8 @@ public void testBadDeletes() throws Exception { gce.addCandidate("hdfs://foo.com:6000/user/foo/tables/a/t-0/t-1/F00.rf"); gca.collect(gce); - System.out.println(gce.deletes); - assertRemoved(gce); + System.out.println(gce.fileDeletions); + assertFileDeleted(gce); } @Test @@ -709,17 +723,17 @@ public void test() throws Exception { gce.addCandidate("/1636/default_tablet"); gce.addDirReference("1636", null, "default_tablet"); gca.collect(gce); - assertRemoved(gce); + assertFileDeleted(gce); gce.candidates.clear(); var tempCandidate = gce.addCandidate("/1636/default_tablet/someFile"); gca.collect(gce); - assertRemoved(gce, tempCandidate); + assertFileDeleted(gce, tempCandidate); gce.addFileReference("1636", null, "/default_tablet/someFile"); gce.addCandidate("/1636/default_tablet/someFile"); gca.collect(gce); - assertRemoved(gce); + assertFileDeleted(gce); // have an indirect file reference gce = new TestGCE(); @@ -728,19 +742,19 @@ public void test() throws Exception { gce.addDirReference("1636", null, "default_tablet"); gce.addCandidate("/9/default_tablet/someFile"); gca.collect(gce); - assertRemoved(gce); + assertFileDeleted(gce); // have an indirect file reference and a directory candidate gce.candidates.clear(); gce.addCandidate("/9/default_tablet"); gca.collect(gce); - assertRemoved(gce); + assertFileDeleted(gce); gce.candidates.clear(); gce.addCandidate("/9/default_tablet"); gce.addCandidate("/9/default_tablet/someFile"); long blipCount = gca.collect(gce); - assertRemoved(gce); + assertFileDeleted(gce); assertEquals(0, blipCount); gce = new TestGCE(); @@ -748,7 +762,7 @@ public void test() throws Exception { gce.blips.add("/1636/b-0001"); gce.addCandidate("/1636/b-0001/I0000"); blipCount = gca.collect(gce); - assertRemoved(gce); + assertFileDeleted(gce); assertEquals(1, blipCount); gce = new TestGCE(); @@ -762,7 +776,7 @@ public void test() throws Exception { gce.addCandidate("/1000/b-1002/I0007"); var candidate = gce.addCandidate("/1000/t-0003/I0008"); blipCount = gca.collect(gce); - assertRemoved(gce, candidate); + assertFileDeleted(gce, candidate); assertEquals(5, blipCount); } @@ -810,7 +824,7 @@ public void finishedReplicationRecordsDontPreventDeletion() throws Exception { // No refs to A000002.rf, and a closed, finished repl for A000001.rf should not preclude // it from being deleted - assertEquals(2, gce.deletes.size()); + assertEquals(2, gce.fileDeletions.size()); } @Test @@ -829,8 +843,8 @@ public void openReplicationRecordsPreventDeletion() throws Exception { gca.collect(gce); // We need to replicate that one file still, should not delete it. - assertEquals(1, gce.deletes.size()); - assertEquals(candidate, gce.deletes.get(0)); + assertEquals(1, gce.fileDeletions.size()); + assertEquals(candidate, gce.fileDeletions.get(0)); } @Test @@ -851,8 +865,8 @@ public void newReplicationRecordsPreventDeletion() throws Exception { gca.collect(gce); // We need to replicate that one file still, should not delete it. - assertEquals(1, gce.deletes.size()); - assertEquals(candidate, gce.deletes.get(0)); + assertEquals(1, gce.fileDeletions.size()); + assertEquals(candidate, gce.fileDeletions.get(0)); } @Test @@ -861,7 +875,7 @@ public void bulkImportReplicationRecordsPreventDeletion() throws Exception { TestGCE gce = new TestGCE(); - assertEquals(0, gce.deletes.size()); + assertEquals(0, gce.fileDeletions.size()); gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/1/t-00001/A000001.rf"); gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/2/t-00002/A000002.rf"); @@ -873,9 +887,9 @@ public void bulkImportReplicationRecordsPreventDeletion() throws Exception { gca.collect(gce); // We need to replicate that one file still, should not delete it. - assertEquals(1, gce.deletes.size()); + assertEquals(1, gce.fileDeletions.size()); assertEquals(new GcCandidate("hdfs://foo.com:6000/accumulo/tables/2/t-00002/A000002.rf", 1L), - gce.deletes.get(0)); + gce.fileDeletions.get(0)); } @Test @@ -915,13 +929,13 @@ public void testDeletingInUseReferenceCandidates() throws Exception { gce.deleteInUseRefs = false; // All candidates currently have references gca.collect(gce); - assertRemoved(gce); + assertFileDeleted(gce); assertNoCandidatesRemoved(gce); // Enable InUseRefs to be removed if the file ref is found. gce.deleteInUseRefs = true; gca.collect(gce); - assertRemoved(gce); + assertFileDeleted(gce); assertCandidateRemoved(gce, GcCandidateType.INUSE, candidate); var cand1 = gce.addCandidate("/9/t0/F003.rf"); @@ -932,7 +946,7 @@ public void testDeletingInUseReferenceCandidates() throws Exception { gca.collect(gce); assertNoCandidatesRemoved(gce); // File references did not exist, so candidates are processed - assertRemoved(gce, cand1, cand2); + assertFileDeleted(gce, cand1, cand2); } @Test @@ -958,14 +972,14 @@ public void testDeletingRootInUseReferenceCandidates() throws Exception { gce.deleteInUseRefs = false; // No InUse Candidates should be removed. gca.collect(gce); - assertRemoved(gce); + assertFileDeleted(gce); assertNoCandidatesRemoved(gce); gce.deleteInUseRefs = true; // Due to the gce Datalevel of ROOT, InUse candidate deletion is not supported regardless of // property setting. gca.collect(gce); - assertRemoved(gce); + assertFileDeleted(gce); assertNoCandidatesRemoved(gce); gce.removeFileReference("+r", null, "/t0/F000.rf"); @@ -975,7 +989,7 @@ public void testDeletingRootInUseReferenceCandidates() throws Exception { // With file references deleted, the GC should now process the candidates gca.collect(gce); - assertRemoved(gce, toBeRemoved); + assertFileDeleted(gce, toBeRemoved); assertNoCandidatesRemoved(gce); } @@ -993,7 +1007,7 @@ public void testInUseDirReferenceCandidates() throws Exception { gce.addDirReference("6", null, "t-0"); gca.collect(gce); - assertRemoved(gce, candTwo); + assertFileDeleted(gce, candTwo); assertNoCandidatesRemoved(gce); assertEquals(1, gce.candidates.size()); @@ -1001,7 +1015,7 @@ public void testInUseDirReferenceCandidates() throws Exception { gce.removeDirReference("6", null); gca.collect(gce); - assertRemoved(gce, candOne); + assertFileDeleted(gce, candOne); assertNoCandidatesRemoved(gce); assertEquals(0, gce.candidates.size()); @@ -1019,12 +1033,38 @@ public void testInUseDirReferenceCandidates() throws Exception { gca.collect(gce); assertCandidateRemoved(gce, GcCandidateType.INUSE, removedCandidate); - assertRemoved(gce); + assertFileDeleted(gce); // Check and make sure the InUse directory candidates are not removed. assertEquals(1, gce.candidates.size()); assertTrue(gce.candidates.contains(candidate)); } + @Test + public void testInUseScanReferenceCandidates() throws Exception { + TestGCE gce = new TestGCE(); + + // InUse Scan Refs should not be removed. + var scanCandidate = gce.addCandidate("/4/t0/F010.rf"); + var candOne = gce.addCandidate("/4/t0/F000.rf"); + var candTwo = gce.addCandidate("/6/t0/F123.rf"); + gce.addScanReference("4", null, "/t0/F010.rf"); + gce.addFileReference("4", null, "/t0/F000.rf"); + + GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm(); + gce.deleteInUseRefs = true; + + gca.collect(gce); + assertFileDeleted(gce, candTwo); + assertCandidateRemoved(gce, GcCandidateType.INUSE, candOne); + assertEquals(Set.of(scanCandidate), gce.candidates); + + gce.removeScanReference("4", null, "/t0/F010.rf"); + gca.collect(gce); + assertFileDeleted(gce, scanCandidate); + assertNoCandidatesRemoved(gce); + assertEquals(0, gce.candidates.size()); + } + // below are tests for potential failure conditions of the GC process. Some of these cases were // observed on clusters. Some were hypothesis based on observations. The result was that // candidate entries were not removed when they should have been and therefore files were diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java b/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java index 482bbd2f442..b9914bef1f8 100644 --- a/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java @@ -703,7 +703,7 @@ private void deleteTablets(MergeInfo info) throws AccumuloException { Key key = entry.getKey(); if (key.compareColumnFamily(DataFileColumnFamily.NAME) == 0) { var stf = new StoredTabletFile(key.getColumnQualifierData().toString()); - datafilesAndDirs.add(new ReferenceFile(stf.getTableId(), stf.getMetaUpdateDelete())); + datafilesAndDirs.add(ReferenceFile.forFile(stf.getTableId(), stf.getMetaUpdateDelete())); if (datafilesAndDirs.size() > 1000) { ample.putGcFileAndDirCandidates(extent.tableId(), datafilesAndDirs); datafilesAndDirs.clear(); diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/bulkVer1/CleanUpBulkImport.java b/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/bulkVer1/CleanUpBulkImport.java index 7cf276dc81d..1ed199a0702 100644 --- a/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/bulkVer1/CleanUpBulkImport.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/bulkVer1/CleanUpBulkImport.java @@ -62,7 +62,7 @@ public Repo call(long tid, Manager manager) throws Exception { ample.removeBulkLoadInProgressFlag( "/" + bulkDir.getParent().getName() + "/" + bulkDir.getName()); ample.putGcFileAndDirCandidates(tableId, - Collections.singleton(new ReferenceFile(tableId, bulkDir.toString()))); + Collections.singleton(ReferenceFile.forFile(tableId, bulkDir.toString()))); log.debug("removing the metadata table markers for loaded files"); ample.removeBulkLoadEntries(tableId, tid, null, null); log.debug("releasing HDFS reservations for " + source + " and " + error); diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/bulkVer2/CleanUpBulkImport.java b/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/bulkVer2/CleanUpBulkImport.java index f681055513d..12bbacff61c 100644 --- a/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/bulkVer2/CleanUpBulkImport.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/bulkVer2/CleanUpBulkImport.java @@ -59,7 +59,7 @@ public Repo call(long tid, Manager manager) throws Exception { ample.removeBulkLoadInProgressFlag( "/" + bulkDir.getParent().getName() + "/" + bulkDir.getName()); ample.putGcFileAndDirCandidates(info.tableId, - Collections.singleton(new ReferenceFile(info.tableId, bulkDir.toString()))); + Collections.singleton(ReferenceFile.forFile(info.tableId, bulkDir.toString()))); if (info.tableState == TableState.ONLINE) { Text firstSplit = info.firstSplit == null ? null : new Text(info.firstSplit); diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/upgrade/Upgrader9to10.java b/server/manager/src/main/java/org/apache/accumulo/manager/upgrade/Upgrader9to10.java index 54c37751101..6bf9e3292b5 100644 --- a/server/manager/src/main/java/org/apache/accumulo/manager/upgrade/Upgrader9to10.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/upgrade/Upgrader9to10.java @@ -601,7 +601,7 @@ static ReferenceFile switchToAllVolumes(Path olddelete) { var tableId = TableId.of(pathNoVolume.getParent().getName()); // except bulk directories don't get an all volume prefix if (pathNoVolume.getName().startsWith(Constants.BULK_PREFIX)) { - return new ReferenceFile(tableId, olddelete.toString()); + return ReferenceFile.forFile(tableId, olddelete.toString()); } else { return new AllVolumesDirectory(tableId, tabletDir); } @@ -610,7 +610,7 @@ static ReferenceFile switchToAllVolumes(Path olddelete) { if (pathNoVolume.depth() == 4) { Path tabletDirPath = pathNoVolume.getParent(); var tableId = TableId.of(tabletDirPath.getParent().getName()); - return new ReferenceFile(tableId, olddelete.toString()); + return ReferenceFile.forFile(tableId, olddelete.toString()); } else { throw new IllegalStateException("Invalid delete marker: " + olddelete); } diff --git a/server/manager/src/test/java/org/apache/accumulo/manager/upgrade/Upgrader9to10Test.java b/server/manager/src/test/java/org/apache/accumulo/manager/upgrade/Upgrader9to10Test.java index a1ad4415e11..290208b14ab 100644 --- a/server/manager/src/test/java/org/apache/accumulo/manager/upgrade/Upgrader9to10Test.java +++ b/server/manager/src/test/java/org/apache/accumulo/manager/upgrade/Upgrader9to10Test.java @@ -85,13 +85,13 @@ public void testSwitchRelativeDeletes() { resolved = Upgrader9to10.resolveRelativeDelete("/5a/" + BULK_PREFIX + "0005", VOL_PROP); assertEquals(new Path(VOL_PROP + "/tables/5a/" + BULK_PREFIX + "0005"), resolved); - ref1 = new ReferenceFile(tableId5a, VOL_PROP + "/tables/5a/" + BULK_PREFIX + "0005"); + ref1 = ReferenceFile.forFile(tableId5a, VOL_PROP + "/tables/5a/" + BULK_PREFIX + "0005"); var ref2 = Upgrader9to10.switchToAllVolumes(resolved); compareReferences(ref1, ref2); resolved = Upgrader9to10.resolveRelativeDelete("/5a/t-0005/F0009.rf", VOL_PROP); assertEquals(new Path(VOL_PROP + "/tables/5a/t-0005/F0009.rf"), resolved); - ref1 = new ReferenceFile(tableId5a, VOL_PROP + "/tables/5a/t-0005/F0009.rf"); + ref1 = ReferenceFile.forFile(tableId5a, VOL_PROP + "/tables/5a/t-0005/F0009.rf"); ref2 = Upgrader9to10.switchToAllVolumes(resolved); compareReferences(ref1, ref2); } @@ -123,14 +123,15 @@ public void testSwitchAllVolumes() { resolved = Upgrader9to10.resolveRelativeDelete( "hdfs://localhost:9000/accumulo/tables/5a/" + BULK_PREFIX + "0005", VOL_PROP); - ref1 = new ReferenceFile(tableId5a, + ref1 = ReferenceFile.forFile(tableId5a, "hdfs://localhost:9000/accumulo/tables/5a/" + BULK_PREFIX + "0005"); var ref2 = Upgrader9to10.switchToAllVolumes(resolved); compareReferences(ref1, ref2); resolved = Upgrader9to10.resolveRelativeDelete( "hdfs://localhost:9000/accumulo/tables/5a/t-0005/C0009.rf", VOL_PROP); - ref1 = new ReferenceFile(tableId5a, "hdfs://localhost:9000/accumulo/tables/5a/t-0005/C0009.rf"); + ref1 = ReferenceFile.forFile(tableId5a, + "hdfs://localhost:9000/accumulo/tables/5a/t-0005/C0009.rf"); ref2 = Upgrader9to10.switchToAllVolumes(resolved); compareReferences(ref1, ref2); } diff --git a/test/src/main/java/org/apache/accumulo/test/functional/GarbageCollectorIT.java b/test/src/main/java/org/apache/accumulo/test/functional/GarbageCollectorIT.java index eb1cafec3fd..c35238dd411 100644 --- a/test/src/main/java/org/apache/accumulo/test/functional/GarbageCollectorIT.java +++ b/test/src/main/java/org/apache/accumulo/test/functional/GarbageCollectorIT.java @@ -452,7 +452,7 @@ private void addEntries(AccumuloClient client) throws Exception { String longpath = "aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeee" + "ffffffffffgggggggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjj"; var path = String.format("file:/%020d/%s", i, longpath); - Mutation delFlag = ample.createDeleteMutation(new ReferenceFile(TableId.of("1"), path)); + Mutation delFlag = ample.createDeleteMutation(ReferenceFile.forFile(TableId.of("1"), path)); bw.addMutation(delFlag); } }