From 578a3d22920c2dc319c3c55ff0b63e899aa15588 Mon Sep 17 00:00:00 2001 From: Rick van Schijndel Date: Wed, 31 Jul 2024 01:19:40 +0200 Subject: [PATCH 1/3] t: increase timeouts for slow commands with high load We've seen many fails on ofborg, at lot of them ultimately appear to come down to a timeout being hit, resulting in something like this: Failure executing slapadd -F //slap.d -b dc=example -l //load.ldif. Hopefully this resolves it for most cases. I've done some endurance testing and this helps a lot. some other commands also regularly time-out with high load: - hydra-init - hydra-create-user - nix-store --delete This should address most issues with tests randomly failing. Used the following script for endurance testing: ``` import os import subprocess run_counter = 0 fail_counter = 0 while True: try: run_counter += 1 print(f"Starting run {run_counter}") env = os.environ env["YATH_JOB_COUNT"] = "20" result = subprocess.run(["perl", "t/test.pl"], env=env) if (result.returncode != 0): fail_counter += 1 print(f"Finish run {run_counter}, total fail count: {fail_counter}") except KeyboardInterrupt: print(f"Finished {run_counter} runs with {fail_counter} fails") break ``` In case someone else wants to do it on their system :). Note that YATH_JOB_COUNT may need to be changed loosely based on your cores. I only have 4 cores (8 threads), so for others higher numbers might yield better results in hashing out unstable tests. --- t/lib/HydraTestContext.pm | 2 +- t/lib/LDAPContext.pm | 6 +++--- .../build-locally-with-substitutable-path.t | 2 +- t/scripts/hydra-create-user.t | 14 +++++++------- t/scripts/hydra-init.t | 4 ++-- 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/t/lib/HydraTestContext.pm b/t/lib/HydraTestContext.pm index e1a5b2261..1d6fa9091 100644 --- a/t/lib/HydraTestContext.pm +++ b/t/lib/HydraTestContext.pm @@ -92,7 +92,7 @@ sub new { $opts{'before_init'}->($self); } - expectOkay(5, ("hydra-init")); + expectOkay(30, ("hydra-init")); return $self; } diff --git a/t/lib/LDAPContext.pm b/t/lib/LDAPContext.pm index 2cd1a19dc..df1334f0e 100644 --- a/t/lib/LDAPContext.pm +++ b/t/lib/LDAPContext.pm @@ -70,7 +70,7 @@ sub add_user { my $email = $opts{'email'} // "$name\@example"; my $password = $opts{'password'} // rand_chars(); - my ($res, $stdout, $stderr) = captureStdoutStderr(1, ("slappasswd", "-s", $password)); + my ($res, $stdout, $stderr) = captureStdoutStderr(5, ("slappasswd", "-s", $password)); if ($res) { die "Failed to execute slappasswd ($res): $stderr, $stdout"; } @@ -178,7 +178,7 @@ sub start { sub validateConfig { my ($self) = @_; - expectOkay(1, ("slaptest", "-u", "-F", $self->{"_slapd_dir"})); + expectOkay(5, ("slaptest", "-u", "-F", $self->{"_slapd_dir"})); } sub _spawn { @@ -218,7 +218,7 @@ sub load_ldif { my $path = "${\$self->{'_tmpdir'}}/load.ldif"; write_file($path, $content); - expectOkay(1, ("slapadd", "-F", $self->{"_slapd_dir"}, "-b", $suffix, "-l", $path)); + expectOkay(5, ("slapadd", "-F", $self->{"_slapd_dir"}, "-b", $suffix, "-l", $path)); $self->validateConfig(); } diff --git a/t/queue-runner/build-locally-with-substitutable-path.t b/t/queue-runner/build-locally-with-substitutable-path.t index e3b31761b..6477635a3 100644 --- a/t/queue-runner/build-locally-with-substitutable-path.t +++ b/t/queue-runner/build-locally-with-substitutable-path.t @@ -39,7 +39,7 @@ subtest "Building, caching, and then garbage collecting the underlying job" => s ok(unlink(Hydra::Helper::Nix::gcRootFor($path)), "Unlinking the GC root for underlying Dependency succeeds"); - (my $ret, my $stdout, my $stderr) = captureStdoutStderr(5, "nix-store", "--delete", $path); + (my $ret, my $stdout, my $stderr) = captureStdoutStderr(15, "nix-store", "--delete", $path); is($ret, 0, "Deleting the underlying dependency should succeed"); }; diff --git a/t/scripts/hydra-create-user.t b/t/scripts/hydra-create-user.t index 71a5eda30..7f943f9d3 100644 --- a/t/scripts/hydra-create-user.t +++ b/t/scripts/hydra-create-user.t @@ -9,7 +9,7 @@ my $db = $ctx->db(); subtest "Handling password and password hash creation" => sub { subtest "Creating a user with a plain text password (insecure) stores the password securely" => sub { - my ($res, $stdout, $stderr) = captureStdoutStderr(5, ("hydra-create-user", "plain-text-user", "--password", "foobar")); + my ($res, $stdout, $stderr) = captureStdoutStderr(15, ("hydra-create-user", "plain-text-user", "--password", "foobar")); is($res, 0, "hydra-create-user should exit zero"); like($stderr, qr/Submitting plaintext passwords as arguments is deprecated and will be removed/, "Submitting a plain text password is deprecated."); @@ -23,7 +23,7 @@ subtest "Handling password and password hash creation" => sub { }; subtest "Creating a user with a sha1 password (still insecure) stores the password as a hashed sha1" => sub { - my ($res, $stdout, $stderr) = captureStdoutStderr(5, ("hydra-create-user", "old-password-hash-user", "--password-hash", "8843d7f92416211de9ebb963ff4ce28125932878")); + my ($res, $stdout, $stderr) = captureStdoutStderr(15, ("hydra-create-user", "old-password-hash-user", "--password-hash", "8843d7f92416211de9ebb963ff4ce28125932878")); is($res, 0, "hydra-create-user should exit zero"); my $user = $db->resultset('Users')->find({ username => "old-password-hash-user" }); @@ -36,7 +36,7 @@ subtest "Handling password and password hash creation" => sub { }; subtest "Creating a user with an argon2 password stores the password as given" => sub { - my ($res, $stdout, $stderr) = captureStdoutStderr(5, ("hydra-create-user", "argon2-hash-user", "--password-hash", '$argon2id$v=19$m=262144,t=3,p=1$tMnV5paYjmIrUIb6hylaNA$M8/e0i3NGrjhOliVLa5LqQ')); + my ($res, $stdout, $stderr) = captureStdoutStderr(15, ("hydra-create-user", "argon2-hash-user", "--password-hash", '$argon2id$v=19$m=262144,t=3,p=1$tMnV5paYjmIrUIb6hylaNA$M8/e0i3NGrjhOliVLa5LqQ')); is($res, 0, "hydra-create-user should exit zero"); my $user = $db->resultset('Users')->find({ username => "argon2-hash-user" }); @@ -50,7 +50,7 @@ subtest "Handling password and password hash creation" => sub { subtest "Creating a user by prompting for the password" => sub { subtest "with the same password twice" => sub { - my ($res, $stdout, $stderr) = captureStdoutStderrWithStdin(5, ["hydra-create-user", "prompted-pass-user", "--password-prompt"], "my-password\nmy-password\n"); + my ($res, $stdout, $stderr) = captureStdoutStderrWithStdin(15, ["hydra-create-user", "prompted-pass-user", "--password-prompt"], "my-password\nmy-password\n"); is($res, 0, "hydra-create-user should exit zero"); my $user = $db->resultset('Users')->find({ username => "prompted-pass-user" }); @@ -62,7 +62,7 @@ subtest "Handling password and password hash creation" => sub { }; subtest "With mismatched password confirmation" => sub { - my ($res, $stdout, $stderr) = captureStdoutStderrWithStdin(5, ["hydra-create-user", "prompted-pass-user", "--password-prompt"], "my-password\nnot-my-password\n"); + my ($res, $stdout, $stderr) = captureStdoutStderrWithStdin(15, ["hydra-create-user", "prompted-pass-user", "--password-prompt"], "my-password\nnot-my-password\n"); isnt($res, 0, "hydra-create-user should exit non-zero"); }; }; @@ -76,7 +76,7 @@ subtest "Handling password and password hash creation" => sub { ); for my $case (@cases) { - my ($res, $stdout, $stderr) = captureStdoutStderr(5, ( + my ($res, $stdout, $stderr) = captureStdoutStderr(15, ( "hydra-create-user", "bogus-password-options", @{$case})); like($stderr, qr/please specify only one of --password-prompt or --password-hash/, "We get an error about specifying the password"); isnt($res, 0, "hydra-create-user should exit non-zero with conflicting " . join(" ", @{$case})); @@ -84,7 +84,7 @@ subtest "Handling password and password hash creation" => sub { }; subtest "A password is not required for creating a Google-based account" => sub { - my ($res, $stdout, $stderr) = captureStdoutStderr(5, ( + my ($res, $stdout, $stderr) = captureStdoutStderr(15, ( "hydra-create-user", "google-account", "--type", "google")); is($res, 0, "hydra-create-user should exit zero"); }; diff --git a/t/scripts/hydra-init.t b/t/scripts/hydra-init.t index bd5bd4bf8..603aa4a4a 100644 --- a/t/scripts/hydra-init.t +++ b/t/scripts/hydra-init.t @@ -28,7 +28,7 @@ subtest "hydra-init upgrades user's password hashes from sha1 to sha1 inside Arg $janet->setPassword("foobar"); is($alice->password, "8843d7f92416211de9ebb963ff4ce28125932878", "Alices's sha1 is stored in the database"); - my ($res, $stdout, $stderr) = captureStdoutStderr(5, ("hydra-init")); + my ($res, $stdout, $stderr) = captureStdoutStderr(30, ("hydra-init")); if ($res != 0) { is($stdout, ""); is($stderr, ""); @@ -55,7 +55,7 @@ subtest "hydra-init upgrades user's password hashes from sha1 to sha1 inside Arg }; subtest "Running hydra-init don't break Alice or Janet's passwords" => sub { - my ($res, $stdout, $stderr) = captureStdoutStderr(5, ("hydra-init")); + my ($res, $stdout, $stderr) = captureStdoutStderr(30, ("hydra-init")); is($res, 0, "hydra-init should exit zero"); my $updatedAlice = $db->resultset('Users')->find({ username => "alice" }); From a6b14369ee05c376deb04dd71062a5b95f186096 Mon Sep 17 00:00:00 2001 From: Rick van Schijndel Date: Wed, 31 Jul 2024 17:10:44 +0200 Subject: [PATCH 2/3] t/test.pl: increase event-timeout, set qvf Only log issues/failures when something's actually up. It has irked me for a long time that so much output came out of running the tests, this seems to silence it. It does hide some warnings, but I think it makes the output so much more readable that it's worth the tradeoff. Helps for highly parallel running of jobs, sometimes they'd not give output for a while. Setting this timeout higher appears to help. Not completely sure if this is the right place to do it, but it works fine for me. --- t/test.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/test.pl b/t/test.pl index ba7f37811..122846373 100644 --- a/t/test.pl +++ b/t/test.pl @@ -21,7 +21,7 @@ BEGIN print STDERR "test.pl: Defaulting \$YATH_JOB_COUNT to \$NIX_BUILD_CORES (${\$ENV{'NIX_BUILD_CORES'}})\n"; } -system($^X, find_yath(), '-D', 'test', '--default-search' => './', @ARGV); +system($^X, find_yath(), '-D', 'test', '--qvf', '--event-timeout', 240, '--default-search' => './', @ARGV); my $exit = $?; # This makes sure it works with prove. From 54002f0fcf4a7cb65baf3e25e665e5325292f609 Mon Sep 17 00:00:00 2001 From: Rick van Schijndel Date: Wed, 31 Jul 2024 17:12:47 +0200 Subject: [PATCH 3/3] t/evaluator/evaluate-oom-job.t: always skip, the test always fails We should look into how to resolve this, but I tried some things and nothing really worked. Let's put it skipped for now until someone comes along to improve it. --- t/evaluator/evaluate-oom-job.t | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/t/evaluator/evaluate-oom-job.t b/t/evaluator/evaluate-oom-job.t index 6c17d4e47..8f0450c50 100644 --- a/t/evaluator/evaluate-oom-job.t +++ b/t/evaluator/evaluate-oom-job.t @@ -31,6 +31,10 @@ if ($sd_res != 0) { skip_all("`systemd-run` returned non-zero when executing `true` (expected 0)"); } +# XXX(Mindavi): We should think about how to fix this. +# Note that it was always skipped on ofborg/h.n.o (nixos hydra) since systemd-run is not present in the ambient environment there. +skip_all("Always fails, an error about 'oom' being a string is logged and the process never OOMs. Needs a way to use more memory."); + my $ctx = test_context(); # Contain the memory usage to 25 MegaBytes using `systemd-run`