Skip to content

Commit

Permalink
Dont reset node during test (#2012)
Browse files Browse the repository at this point in the history
# Description
Ever since we replaced `hardhat` with `anvil` we got flaky tests.
The issue seems to be related to deadlocks inside `anvil`. It's possible
that these deadlocks could be related to running the same node instance
for all tests and resetting it over and over again.

# Changes
Drop `anvil` from the `docker-compose` file and instead spawn a fresh
`anvil` instance for every `e2e` test.

## How to test
Rerun CI a couple of times and 🙏 that we don't see flaky errors?
  • Loading branch information
MartinquaXD authored Oct 25, 2023
1 parent a7f2f65 commit 5328d08
Show file tree
Hide file tree
Showing 7 changed files with 153 additions and 103 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/pull-request.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,14 +66,15 @@ jobs:
CARGO_PROFILE_TEST_DEBUG: 0
steps:
- uses: actions/checkout@v3
- uses: foundry-rs/foundry-toolchain@v1
- uses: Swatinem/rust-cache@v2
# Start the build process in the background. The following cargo test command will automatically
# wait for the build process to be done before proceeding.
- run: cargo build -p e2e --tests &
- uses: yu-ichiro/spin-up-docker-compose-action@v1
with:
file: docker-compose.yaml
up-opts: -d db migrations chain
up-opts: -d db migrations
- run: cargo test -p e2e local_node -- --ignored --test-threads 1 --nocapture

test-driver:
Expand Down
40 changes: 1 addition & 39 deletions crates/e2e/src/nodes/forked_node.rs
Original file line number Diff line number Diff line change
@@ -1,49 +1,11 @@
use {
super::TestNode,
ethcontract::H160,
reqwest::{IntoUrl, Url},
reqwest::Url,
serde_json::json,
std::fmt::Debug,
web3::{api::Namespace, helpers::CallFuture, Transport},
};

pub struct Forker<T> {
forked_node_api: ForkedNodeApi<T>,
fork_url: Url,
}

impl<T: Transport> Forker<T> {
pub async fn new(web3: &web3::Web3<T>, solver_address: H160, fork_url: impl IntoUrl) -> Self {
let fork_url = fork_url.into_url().expect("Invalid fork URL");

let forked_node_api = web3.api::<ForkedNodeApi<_>>();
forked_node_api
.fork(&fork_url)
.await
.expect("Test network must support anvil_reset");

forked_node_api
.impersonate(&solver_address)
.await
.expect("Test network must support anvil_impersonateAccount");

Self {
forked_node_api,
fork_url,
}
}
}

#[async_trait::async_trait(?Send)]
impl<T: Transport> TestNode for Forker<T> {
async fn reset(&self) {
self.forked_node_api
.fork(&self.fork_url)
.await
.expect("Test network must support anvil_reset");
}
}

#[derive(Debug, Clone)]
pub struct ForkedNodeApi<T> {
transport: T,
Expand Down
30 changes: 0 additions & 30 deletions crates/e2e/src/nodes/local_node.rs
Original file line number Diff line number Diff line change
@@ -1,40 +1,10 @@
use {
super::TestNode,
chrono::{DateTime, Utc},
ethcontract::{H160, U256},
std::fmt::Debug,
web3::{api::Namespace, helpers::CallFuture, Transport},
};

pub struct Resetter<T> {
test_node_api: TestNodeApi<T>,
snapshot_id: U256,
}

impl<T: Transport> Resetter<T> {
pub async fn new(web3: &web3::Web3<T>) -> Self {
let test_node_api = web3.api::<TestNodeApi<_>>();
let snapshot_id = test_node_api
.snapshot()
.await
.expect("Test network must support evm_snapshot");
Self {
test_node_api,
snapshot_id,
}
}
}

#[async_trait::async_trait(?Send)]
impl<T: Transport> TestNode for Resetter<T> {
async fn reset(&self) {
self.test_node_api
.revert(&self.snapshot_id)
.await
.expect("Test network must support evm_revert");
}
}

#[derive(Debug, Clone)]
pub struct TestNodeApi<T> {
transport: T,
Expand Down
109 changes: 106 additions & 3 deletions crates/e2e/src/nodes/mod.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,112 @@
pub mod forked_node;
pub mod local_node;

/// The default node URL that should be used for e2e tests.
pub const NODE_HOST: &str = "http://127.0.0.1:8545";

#[async_trait::async_trait(?Send)]
pub trait TestNode {
async fn reset(&self);
/// A blockchain node for development purposes. Dropping this type will
/// terminate the node.
pub struct Node {
process: Option<tokio::process::Child>,
}

impl Node {
/// Spawns a new node that is forked from the given URL.
pub async fn forked(fork: impl reqwest::IntoUrl) -> Self {
Self::spawn_process(&["--port", "8545", "--fork-url", fork.as_str()]).await
}

/// Spawns a new local test net with some default parameters.
pub async fn new() -> Self {
Self::spawn_process(&[
"--port",
"8545",
"--gas-price",
"1",
"--gas-limit",
"10000000",
"--base-fee",
"0",
"--balance",
"1000000",
"--chain-id",
"1",
"--timestamp",
"1577836800",
])
.await
}

/// Spawn a new node instance using the list of given arguments.
async fn spawn_process(args: &[&str]) -> Self {
use tokio::io::AsyncBufReadExt as _;

// Allow using some custom logic to spawn `anvil` by setting `ANVIL_COMMAND`.
// For example if you set up a command that spins up a docker container.
let command = std::env::var("ANVIL_COMMAND").unwrap_or("anvil".to_string());

let mut process = tokio::process::Command::new(command)
.args(args)
.stdout(std::process::Stdio::piped())
.spawn()
.unwrap();

let stdout = process.stdout.take().unwrap();
let (sender, receiver) = tokio::sync::oneshot::channel::<String>();

tokio::task::spawn(async move {
let mut sender = Some(sender);
const NEEDLE: &str = "Listening on ";
let mut reader = tokio::io::BufReader::new(stdout).lines();
while let Some(line) = reader.next_line().await.unwrap() {
tracing::trace!(line);
if let Some(addr) = line.strip_prefix(NEEDLE) {
match sender.take() {
Some(sender) => sender.send(format!("http://{addr}")).unwrap(),
None => tracing::error!(addr, "detected multiple anvil endpoints"),
}
}
}
});

let _url = tokio::time::timeout(tokio::time::Duration::from_secs(1), receiver)
.await
.expect("finding anvil URL timed out")
.unwrap();
Self {
process: Some(process),
}
}

/// Most reliable way to kill the process. If you get the chance to manually
/// clean up the [`Node`] do it because the [`Drop::drop`]
/// implementation can not be as reliable due to missing async support.
pub async fn kill(&mut self) {
let mut process = match self.process.take() {
Some(node) => node,
// Somebody already called `Node::kill()`
None => return,
};

if let Err(err) = process.kill().await {
tracing::error!(?err, "failed to kill node process");
}
}
}

impl Drop for Node {
fn drop(&mut self) {
let mut process = match self.process.take() {
Some(process) => process,
// Somebody already called `Node::kill()`
None => return,
};

// This only sends SIGKILL to the process but does not wait for the process to
// actually terminate. But since `anvil` is fairly well behaved that
// should be good enough in many cases.
if let Err(err) = process.start_kill() {
tracing::error!(?err, "failed to kill node process");
}
}
}
52 changes: 31 additions & 21 deletions crates/e2e/src/setup/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ pub mod onchain_components;
mod services;

use {
crate::nodes::{forked_node::Forker, local_node::Resetter, TestNode, NODE_HOST},
crate::nodes::{Node, NODE_HOST},
anyhow::{anyhow, Result},
ethcontract::{futures::FutureExt, H160},
shared::ethrpc::{create_test_transport, Web3},
Expand All @@ -14,7 +14,7 @@ use {
io::Write,
iter::empty,
panic::{self, AssertUnwindSafe},
sync::Mutex,
sync::{Arc, Mutex},
time::Duration,
},
tempfile::TempPath,
Expand Down Expand Up @@ -93,7 +93,7 @@ where
F: FnOnce(Web3) -> Fut,
Fut: Future<Output = ()>,
{
run(f, empty::<&str>(), None, None).await
run(f, empty::<&str>(), None).await
}

pub async fn run_test_with_extra_filters<F, Fut, T>(
Expand All @@ -104,15 +104,15 @@ pub async fn run_test_with_extra_filters<F, Fut, T>(
Fut: Future<Output = ()>,
T: AsRef<str>,
{
run(f, extra_filters, None, None).await
run(f, extra_filters, None).await
}

pub async fn run_forked_test<F, Fut>(f: F, solver_address: H160, fork_url: String)
where
F: FnOnce(Web3) -> Fut,
Fut: Future<Output = ()>,
{
run(f, empty::<&str>(), Some(solver_address), Some(fork_url)).await
run(f, empty::<&str>(), Some((solver_address, fork_url))).await
}

pub async fn run_forked_test_with_extra_filters<F, Fut, T>(
Expand All @@ -125,15 +125,11 @@ pub async fn run_forked_test_with_extra_filters<F, Fut, T>(
Fut: Future<Output = ()>,
T: AsRef<str>,
{
run(f, extra_filters, Some(solver_address), Some(fork_url)).await
run(f, extra_filters, Some((solver_address, fork_url))).await
}

async fn run<F, Fut, T>(
f: F,
filters: impl IntoIterator<Item = T>,
solver_address: Option<H160>,
fork_url: Option<String>,
) where
async fn run<F, Fut, T>(f: F, filters: impl IntoIterator<Item = T>, fork: Option<(H160, String)>)
where
F: FnOnce(Web3) -> Fut,
Fut: Future<Output = ()>,
T: AsRef<str>,
Expand All @@ -148,25 +144,39 @@ async fn run<F, Fut, T>(
// it but rather in the locked state.
let _lock = NODE_MUTEX.lock();

let node = match &fork {
Some((_, fork)) => Node::forked(fork).await,
None => Node::new().await,
};

let node = Arc::new(Mutex::new(Some(node)));
let node_panic_handle = node.clone();
observe::panic_hook::prepend_panic_handler(Box::new(move |_| {
// Drop node in panic handler because `.catch_unwind()` does not catch all
// panics
let _ = node_panic_handle.lock().unwrap().take();
}));

let http = create_test_transport(NODE_HOST);
let web3 = Web3::new(http);

let test_node: Box<dyn TestNode> =
if let (Some(fork_url), Some(solver_address)) = (fork_url, solver_address) {
Box::new(Forker::new(&web3, solver_address, fork_url).await)
} else {
Box::new(Resetter::new(&web3).await)
};
if let Some((solver, _)) = &fork {
Web3::api::<crate::nodes::forked_node::ForkedNodeApi<_>>(&web3)
.impersonate(solver)
.await
.unwrap();
}

services::clear_database().await;

// Hack: the closure may actually be unwind unsafe; moreover, `catch_unwind`
// does not catch some types of panics. In this cases, the state of the node
// is not restored. This is not considered an issue since this function
// is supposed to be used in a test environment.
let result = AssertUnwindSafe(f(web3.clone())).catch_unwind().await;

test_node.reset().await;
let node = node.lock().unwrap().take();
if let Some(mut node) = node {
node.kill().await;
}
services::clear_database().await;

if let Err(err) = result {
Expand Down
13 changes: 13 additions & 0 deletions crates/observe/src/panic_hook.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,19 @@ pub fn install() {
std::panic::set_hook(Box::new(new_hook));
}

/// Installs a panic handler that executes [`handler`] plus whatever panic
/// handler was already set up.
/// This can be useful to make absolutely sure to clean up some resources like
/// running processes on a panic.
pub fn prepend_panic_handler(handler: Box<dyn Fn(&std::panic::PanicInfo) + Send + Sync>) {
let previous_hook = std::panic::take_hook();
let new_hook = move |info: &std::panic::PanicInfo| {
handler(info);
previous_hook(info);
};
std::panic::set_hook(Box::new(new_hook));
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
9 changes: 0 additions & 9 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,5 @@ services:
source: ./database/sql/
target: /flyway/sql

chain:
image: ghcr.io/foundry-rs/foundry:latest
restart: always
entrypoint: /usr/local/bin/anvil --gas-price 1 --gas-limit 10000000 --base-fee 0 --balance 1000000 --chain-id 1 --timestamp 1577836800
environment:
- ANVIL_IP_ADDR=0.0.0.0
ports:
- 8545:8545

volumes:
postgres:

0 comments on commit 5328d08

Please sign in to comment.