Skip to content

Commit

Permalink
Implemented PR suggestions, removed race condition
Browse files Browse the repository at this point in the history
  • Loading branch information
dlmarion committed Dec 16, 2024
1 parent fbf0b87 commit 77492bf
Showing 1 changed file with 22 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import org.apache.accumulo.core.client.Durability;
import org.apache.accumulo.core.data.Mutation;
import org.apache.accumulo.core.dataImpl.KeyExtent;
import org.apache.accumulo.core.fate.zookeeper.ServiceLock;
import org.apache.accumulo.core.protobuf.ProtobufUtil;
import org.apache.accumulo.core.util.Halt;
import org.apache.accumulo.core.util.Retry;
Expand Down Expand Up @@ -388,6 +389,7 @@ private void write(final Collection<CommitSession> sessions, boolean mincFinish,

boolean success = false;
while (!success) {
boolean sawWriteFailure = false;
try {
// get a reference to the loggers that no other thread can touch
AtomicInteger currentId = new AtomicInteger(-1);
Expand Down Expand Up @@ -442,7 +444,7 @@ private void write(final Collection<CommitSession> sessions, boolean mincFinish,
writeRetry.logRetry(log, "Logs closed while writing", ex);
} catch (Exception t) {
writeRetry.logRetry(log, "Failed to write to WAL", t);

sawWriteFailure = true;
try {
// Backoff
writeRetry.waitForNextAttempt(log, "write to WAL");
Expand All @@ -458,24 +460,27 @@ private void write(final Collection<CommitSession> sessions, boolean mincFinish,
// the logs haven't changed.
final int finalCurrent = currentLogId;
if (!success) {
final ServiceLock tabletServerLock = tserver.getLock();
if (sawWriteFailure && tabletServerLock != null) {
log.info("WAL write failure, validating server lock in ZooKeeper");
if (!tabletServerLock.verifyLockAtSource()) {
// try to close the log, then Halt the VM
testLockAndRun(logIdLock, new TestCallWithWriteLock() {

@Override
boolean test() {
return true;
}

if (tserver.getLock() == null || !tserver.getLock().verifyLockAtSource()) {
// try to close the log, then Halt the VM
testLockAndRun(logIdLock, new TestCallWithWriteLock() {

@Override
boolean test() {
return true;
}

@Override
void withWriteLock() throws IOException {
close();
}
});
@Override
void withWriteLock() throws IOException {
close();
}
});

log.error("Writing to WAL has failed and TabletServer lock does not exist. Halting...");
Halt.halt("TabletServer lock does not exist", -1);
log.error("Writing to WAL has failed and TabletServer lock does not exist. Halting...");
Halt.halt("TabletServer lock does not exist", -1);
}
}

testLockAndRun(logIdLock, new TestCallWithWriteLock() {
Expand Down

0 comments on commit 77492bf

Please sign in to comment.