Skip to content

Commit

Permalink
Resolve Boogie test deadlocks (#932)
Browse files Browse the repository at this point in the history
### Changes
- Made a change to the Dispose method of
`CustomStackSizePoolTaskScheduler.cs`, so it now interrupts all its
thread instead of joining them. I think this was causing the entire
Boogie process to hang after it was finished, causing tests to deadlock.
- Add `blame-hang-timeout` to the `dotnet test` invocation that runs the
NUnit tests, which allows identifying which test fails when the unit
tests timeout.
- Add an undocumented option `/processTimeLimit` that can be used by
tests to let Boogie stop itself after a certain timespan, allowing
developers to get a stacktrace of where Boogie was at the point of
timeout.
- Remove APIs for cancelling requests, which were only available
programmatically and not used by Dafny.
- Fix a bug that could cause a concurrency related exception in
`QuantifierInstantiationInfo`, which was introduced by
#862.
- Use an invariant culture somewhere in printing debug output, so the
Boogie tests locally pass on my machine, despite my European culture.
- Some renaming of variations of VerifyImplementation, to make the
differences more clear.
- Configure a 30 seconds timelimit per VC for all Boogie tests by
default. Added an exception for a few slow tests
- Let Boogie cancel checking of a VC after its timelimit has passed,
regardless of what the solver is doing

### Testing
- Did not add any additional tests
- Let the test-suite succeed without retries 4 times, in an attempt to
determine that it resolves the test instabilities we've had.
  • Loading branch information
keyboardDrummer authored Aug 13, 2024
1 parent a3cc4ba commit 004e283
Show file tree
Hide file tree
Showing 17 changed files with 128 additions and 219 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ jobs:
# Create packages
dotnet pack --no-build -c ${{ matrix.configuration }} ${SOLUTION}
# Run unit tests
dotnet test --no-build -c ${{ matrix.configuration }} ${SOLUTION}
dotnet test --blame-hang-timeout 120s --no-build -c ${{ matrix.configuration }} ${SOLUTION}
# Run lit test suite
lit --param ${{ matrix.lit_param }} -v --timeout=120 -D configuration=${{ matrix.configuration }} Test
- name: Deploy to nuget
Expand Down
12 changes: 10 additions & 2 deletions Source/BoogieDriver/BoogieDriver.cs
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Diagnostics.Contracts;
using System.IO;
using System.Threading;
using System.Threading.Tasks;

namespace Microsoft.Boogie
{
Expand All @@ -20,6 +23,12 @@ public static int Main(string[] args)
{
return 1;
}
var source = new CancellationTokenSource();
if (options.ProcessTimeLimit != 0)
{
var span = TimeSpan.FromSeconds(options.ProcessTimeLimit);
source.CancelAfter(span);
}
using var executionEngine = ExecutionEngine.CreateWithoutSharedCache(options);

if (options.ProcessInfoFlags())
Expand Down Expand Up @@ -63,8 +72,7 @@ public static int Main(string[] args)

Helpers.ExtraTraceInformation(options, "Becoming sentient");

var success = executionEngine.ProcessFiles(Console.Out, fileList).Result;

var success = executionEngine.ProcessFiles(Console.Out, fileList, cancellationToken: source.Token).Result;
if (options.XmlSink != null)
{
options.XmlSink.Close();
Expand Down
2 changes: 1 addition & 1 deletion Source/Core/AsyncQueue.cs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ public IEnumerable<T> Items
}
}

public void Clear() {
public void CancelWaitsAndClear() {
while (customers.TryDequeue(out var customer)) {
customer.TrySetCanceled();
}
Expand Down
8 changes: 8 additions & 0 deletions Source/ExecutionEngine/CommandLineOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,10 @@ public int BracketIdsInVC
}
}

/// <summary>
/// A hidden option that configures a time limit for the whole Boogie CLI invocation
/// </summary>
public uint ProcessTimeLimit { get; set; } = 0;
public uint TimeLimit { get; set; } = 0; // 0 means no limit
public uint ResourceLimit { get; set; } = 0; // default to 0
public uint SmokeTimeout { get; set; } = 10; // default to 10s
Expand Down Expand Up @@ -1273,6 +1277,10 @@ protected override bool ParseOption(string name, CommandLineParseState ps)
case "timeLimit":
ps.GetUnsignedNumericArgument(x => TimeLimit = x, null);
return true;

case "processTimeLimit":
ps.GetUnsignedNumericArgument(x => ProcessTimeLimit = x, null);
return true;

case "rlimit":
ps.GetUnsignedNumericArgument(x => ResourceLimit = x);
Expand Down
21 changes: 15 additions & 6 deletions Source/ExecutionEngine/CustomStackSizePoolTaskScheduler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -78,13 +78,22 @@ private void WorkLoop()

private void RunItem()
{
try {
try
{
var task = queue.Dequeue().Result;
TryExecuteTask(task);
} catch(Exception e) {
if (e.GetBaseException() is OperationCanceledException) {
}
catch (ThreadInterruptedException)
{
}
catch (Exception e)
{
if (e.GetBaseException() is OperationCanceledException)
{
// Async queue cancels tasks when it is disposed, which happens when this scheduler is disposed
} else {
}
else
{
throw;
}
}
Expand All @@ -93,10 +102,10 @@ private void RunItem()
public void Dispose()
{
disposeTokenSource.Cancel();
queue.Clear();
queue.CancelWaitsAndClear();
foreach (var thread in threads)
{
thread.Join();
thread.Interrupt();
}
}
}
99 changes: 41 additions & 58 deletions Source/ExecutionEngine/ExecutionEngine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -74,29 +74,24 @@ public static ExecutionEngine CreateWithoutSharedCache(ExecutionEngineOptions op

static DateTime FirstRequestStart;

static readonly ConcurrentDictionary<string, TimeSpan>
TimePerRequest = new ConcurrentDictionary<string, TimeSpan>();
static readonly ConcurrentDictionary<string, TimeSpan> TimePerRequest = new();

static readonly ConcurrentDictionary<string, PipelineStatistics> StatisticsPerRequest =
new ConcurrentDictionary<string, PipelineStatistics>();
static readonly ConcurrentDictionary<string, PipelineStatistics> StatisticsPerRequest = new();

static readonly ConcurrentDictionary<string, CancellationTokenSource> ImplIdToCancellationTokenSource =
new ConcurrentDictionary<string, CancellationTokenSource>();

static readonly ConcurrentDictionary<string, CancellationTokenSource> RequestIdToCancellationTokenSource =
new ConcurrentDictionary<string, CancellationTokenSource>();
static readonly ConcurrentDictionary<string, CancellationTokenSource> ImplIdToCancellationTokenSource = new();

private readonly TaskScheduler largeThreadScheduler;
private readonly bool disposeScheduler;

public async Task<bool> ProcessFiles(TextWriter output, IList<string> fileNames, bool lookForSnapshots = true,
string programId = null) {
string programId = null, CancellationToken cancellationToken = default) {
Contract.Requires(cce.NonNullElements(fileNames));

if (Options.VerifySeparately && 1 < fileNames.Count) {
var success = true;
foreach (var fileName in fileNames) {
success &= await ProcessFiles(output, new List<string> { fileName }, lookForSnapshots, fileName);
cancellationToken.ThrowIfCancellationRequested();
success &= await ProcessFiles(output, new List<string> { fileName }, lookForSnapshots, fileName, cancellationToken: cancellationToken);
}
return success;
}
Expand All @@ -105,22 +100,30 @@ public async Task<bool> ProcessFiles(TextWriter output, IList<string> fileNames,
var snapshotsByVersion = LookForSnapshots(fileNames);
var success = true;
foreach (var snapshots in snapshotsByVersion) {
cancellationToken.ThrowIfCancellationRequested();
// BUG: Reusing checkers during snapshots doesn't work, even though it should. We create a new engine (and thus checker pool) to workaround this.
using var engine = new ExecutionEngine(Options, Cache,
CustomStackSizePoolTaskScheduler.Create(StackSize, Options.VcsCores), true);
success &= await engine.ProcessFiles(output, new List<string>(snapshots), false, programId);
success &= await engine.ProcessFiles(output, new List<string>(snapshots), false, programId, cancellationToken: cancellationToken);
}
return success;
}

return await ProcessProgramFiles(output, fileNames, programId, cancellationToken);
}

private Task<bool> ProcessProgramFiles(TextWriter output, IList<string> fileNames, string programId,
CancellationToken cancellationToken)
{
using XmlFileScope xf = new XmlFileScope(Options.XmlSink, fileNames[^1]);
Program program = ParseBoogieProgram(fileNames, false);
var bplFileName = fileNames[^1];
if (program == null) {
return true;
if (program == null)
{
return Task.FromResult(true);
}

return await ProcessProgram(output, program, bplFileName, programId);
return ProcessProgram(output, program, bplFileName, programId, cancellationToken: cancellationToken);
}

[Obsolete("Please inline this method call")]
Expand All @@ -129,7 +132,7 @@ public bool ProcessProgram(Program program, string bplFileName,
return ProcessProgram(Options.OutputWriter, program, bplFileName, programId).Result;
}

public async Task<bool> ProcessProgram(TextWriter output, Program program, string bplFileName, string programId = null)
public async Task<bool> ProcessProgram(TextWriter output, Program program, string bplFileName, string programId = null, CancellationToken cancellationToken = default)
{
if (programId == null)
{
Expand All @@ -147,7 +150,7 @@ public async Task<bool> ProcessProgram(TextWriter output, Program program, strin

if (Options.PrintCFGPrefix != null) {
foreach (var impl in program.Implementations) {
using StreamWriter sw = new StreamWriter(Options.PrintCFGPrefix + "." + impl.Name + ".dot");
await using StreamWriter sw = new StreamWriter(Options.PrintCFGPrefix + "." + impl.Name + ".dot");
await sw.WriteAsync(program.ProcessLoops(Options, impl).ToDot());
}
}
Expand All @@ -168,7 +171,7 @@ public async Task<bool> ProcessProgram(TextWriter output, Program program, strin
Inline(program);

var stats = new PipelineStatistics();
outcome = await InferAndVerify(output, program, stats, 1 < Options.VerifySnapshots ? programId : null);
outcome = await InferAndVerify(output, program, stats, 1 < Options.VerifySnapshots ? programId : null, cancellationToken: cancellationToken);
switch (outcome) {
case PipelineOutcome.Done:
case PipelineOutcome.VerificationCompleted:
Expand Down Expand Up @@ -530,7 +533,8 @@ public async Task<PipelineOutcome> InferAndVerify(
Program program,
PipelineStatistics stats,
string programId = null,
ErrorReporterDelegate er = null, string requestId = null)
ErrorReporterDelegate er = null, string requestId = null,
CancellationToken cancellationToken = default)
{
Contract.Requires(program != null);
Contract.Requires(stats != null);
Expand All @@ -542,7 +546,7 @@ public async Task<PipelineOutcome> InferAndVerify(

var start = DateTime.UtcNow;

var processedProgram = await PreProcessProgramVerification(program);
var processedProgram = await PreProcessProgramVerification(program, cancellationToken);

foreach (var action in Options.UseResolvedProgram) {
action(Options, processedProgram);
Expand All @@ -555,7 +559,7 @@ public async Task<PipelineOutcome> InferAndVerify(

if (Options.ContractInfer)
{
return await RunHoudini(program, stats, er);
return await RunHoudini(program, stats, er, cancellationToken);
}
var stablePrioritizedImpls = GetPrioritizedImplementations(program);

Expand All @@ -565,7 +569,8 @@ public async Task<PipelineOutcome> InferAndVerify(
out stats.CachingActionCounts);
}

var outcome = await VerifyEachImplementation(output, processedProgram, stats, programId, er, requestId, stablePrioritizedImpls);
var outcome = await VerifyEachImplementation(output, processedProgram, stats,
programId, er, stablePrioritizedImpls, cancellationToken);

if (1 < Options.VerifySnapshots && programId != null)
{
Expand All @@ -578,7 +583,7 @@ public async Task<PipelineOutcome> InferAndVerify(
return outcome;
}

private Task<ProcessedProgram> PreProcessProgramVerification(Program program)
private Task<ProcessedProgram> PreProcessProgramVerification(Program program, CancellationToken cancellationToken)
{
return LargeThreadTaskFactory.StartNew(() =>
{
Expand Down Expand Up @@ -613,7 +618,7 @@ private Task<ProcessedProgram> PreProcessProgramVerification(Program program)

program.DeclarationDependencies = Pruner.ComputeDeclarationDependencies(Options, program);
return processedProgram;
});
}, cancellationToken);
}

private ProcessedProgram ExtractLoops(Program program)
Expand Down Expand Up @@ -652,18 +657,16 @@ private Implementation[] GetPrioritizedImplementations(Program program)

private async Task<PipelineOutcome> VerifyEachImplementation(TextWriter outputWriter, ProcessedProgram processedProgram,
PipelineStatistics stats,
string programId, ErrorReporterDelegate er, string requestId, Implementation[] stablePrioritizedImpls)
string programId, ErrorReporterDelegate er, Implementation[] stablePrioritizedImpls,
CancellationToken cancellationToken)
{
var consoleCollector = new ConcurrentToSequentialWriteManager(outputWriter);

var cts = new CancellationTokenSource();
RequestIdToCancellationTokenSource.AddOrUpdate(requestId, cts, (k, ov) => cts);

var tasks = stablePrioritizedImpls.Select(async (impl, index) => {
await using var taskWriter = consoleCollector.AppendWriter();
var implementation = stablePrioritizedImpls[index];
var result = await EnqueueVerifyImplementation(processedProgram, stats, programId, er,
implementation, cts, taskWriter);
implementation, cancellationToken, taskWriter);
var output = result.GetOutput(Options.Printer, this, stats, er);
await taskWriter.WriteAsync(output);
return result;
Expand All @@ -681,9 +684,6 @@ private async Task<PipelineOutcome> VerifyEachImplementation(TextWriter outputWr
Options.Printer.ErrorWriteLine(outputWriter, "Fatal Error: ProverException: {0}", e.Message);
outcome = PipelineOutcome.FatalError;
}
finally {
CleanupRequest(requestId);
}

if (Options.Trace && Options.TrackVerificationCoverage && processedProgram.Program.AllCoveredElements.Any()) {
Options.OutputWriter.WriteLine("Proof dependencies of whole program:\n {0}",
Expand Down Expand Up @@ -711,7 +711,7 @@ public void Error(IToken tok, string msg)
}
}

public async Task<IReadOnlyList<IVerificationTask>> GetVerificationTasks(Program program)
public async Task<IReadOnlyList<IVerificationTask>> GetVerificationTasks(Program program, CancellationToken cancellationToken = default)
{
var sink = new CollectingErrorSink();
var resolutionErrors = program.Resolve(Options, sink);
Expand All @@ -731,7 +731,7 @@ public async Task<IReadOnlyList<IVerificationTask>> GetVerificationTasks(Program
CoalesceBlocks(program);
Inline(program);

var processedProgram = await PreProcessProgramVerification(program);
var processedProgram = await PreProcessProgramVerification(program, cancellationToken);
return GetPrioritizedImplementations(program).SelectMany(implementation =>
{
var writer = TextWriter.Null;
Expand Down Expand Up @@ -793,7 +793,7 @@ private async Task<ImplementationRunResult> EnqueueVerifyImplementation(
await verifyImplementationSemaphore.WaitAsync(cancellationToken);
try
{
return await VerifyImplementation(processedProgram, stats, errorReporterDelegate,
return await VerifyImplementationWithCaching(processedProgram, stats, errorReporterDelegate,
cancellationToken, implementation, programId, taskWriter);
}
finally
Expand Down Expand Up @@ -843,25 +843,7 @@ private void TraceCachingForBenchmarking(PipelineStatistics stats,
}
}

public static void CancelRequest(string requestId)
{
Contract.Requires(requestId != null);

if (RequestIdToCancellationTokenSource.TryGetValue(requestId, out var cts))
{
cts.Cancel();
}
}

private static void CleanupRequest(string requestId)
{
if (requestId != null)
{
RequestIdToCancellationTokenSource.TryRemove(requestId, out var old);
}
}

private async Task<ImplementationRunResult> VerifyImplementation(
private async Task<ImplementationRunResult> VerifyImplementationWithCaching(
ProcessedProgram processedProgram,
PipelineStatistics stats,
ErrorReporterDelegate er,
Expand All @@ -878,7 +860,7 @@ private async Task<ImplementationRunResult> VerifyImplementation(
Options.Printer.Inform("", traceWriter); // newline
Options.Printer.Inform($"Verifying {implementation.VerboseName} ...", traceWriter);

var result = await VerifyImplementationWithoutCaching(processedProgram, stats, er, cancellationToken,
var result = await VerifyImplementationWithLargeThread(processedProgram, stats, er, cancellationToken,
programId, implementation, traceWriter);
if (0 < Options.VerifySnapshots && !string.IsNullOrEmpty(implementation.Checksum))
{
Expand Down Expand Up @@ -911,7 +893,7 @@ public ImplementationRunResult GetCachedVerificationResult(Implementation impl,
return null;
}

private Task<ImplementationRunResult> VerifyImplementationWithoutCaching(ProcessedProgram processedProgram,
private Task<ImplementationRunResult> VerifyImplementationWithLargeThread(ProcessedProgram processedProgram,
PipelineStatistics stats, ErrorReporterDelegate er, CancellationToken cancellationToken,
string programId, Implementation impl, TextWriter traceWriter)
{
Expand All @@ -927,7 +909,7 @@ private Task<ImplementationRunResult> VerifyImplementationWithoutCaching(Process
try
{
(verificationResult.VcOutcome, verificationResult.Errors, verificationResult.RunResults) =
await vcGen.VerifyImplementation2(new ImplementationRun(impl, traceWriter), cancellationToken);
await vcGen.VerifyImplementationDirectly(new ImplementationRun(impl, traceWriter), cancellationToken);
processedProgram.PostProcessResult(vcGen, impl, verificationResult);
}
catch (VCGenException e)
Expand Down Expand Up @@ -984,7 +966,8 @@ private Task<ImplementationRunResult> VerifyImplementationWithoutCaching(Process

#region Houdini

private async Task<PipelineOutcome> RunHoudini(Program program, PipelineStatistics stats, ErrorReporterDelegate er)
private async Task<PipelineOutcome> RunHoudini(Program program, PipelineStatistics stats, ErrorReporterDelegate er,
CancellationToken cancellationToken)
{
Contract.Requires(stats != null);

Expand Down
Loading

0 comments on commit 004e283

Please sign in to comment.