diff --git a/.env b/.env index e772ca023..4671ff9f0 100644 --- a/.env +++ b/.env @@ -37,6 +37,8 @@ ESPRESSO_SEQUENCER_GENESIS_FILE=/genesis/demo.toml ESPRESSO_SEQUENCER_L1_PORT=8545 ESPRESSO_SEQUENCER_L1_WS_PORT=8546 ESPRESSO_SEQUENCER_L1_PROVIDER=http://demo-l1-network:${ESPRESSO_SEQUENCER_L1_PORT} +ESPRESSO_NODE_VALIDATOR_PORT=9000 + # Only allow 1 block to be processed for events at a time, simulating a very bad L1 provider. ESPRESSO_SEQUENCER_L1_EVENTS_MAX_BLOCK_RANGE=1 ESPRESSO_SEQUENCER_ETH_MNEMONIC="test test test test test test test test test test test junk" diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e4fcca432..22debaa26 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -81,6 +81,7 @@ jobs: target/release/pub-key target/release/espresso-bridge target/release/marketplace-solver + target/release/node-metrics build-arm: runs-on: buildjet-4vcpu-ubuntu-2204-arm @@ -133,6 +134,7 @@ jobs: target/release/pub-key target/release/espresso-bridge target/release/marketplace-solver + target/release/node-metrics build-dockers: runs-on: ubuntu-latest @@ -153,6 +155,7 @@ jobs: espresso-dev-node-tag: ${{ steps.espresso-dev-node.outputs.tags }} bridge-tag: ${{ steps.bridge.outputs.tags }} marketplace-solver-tag: ${{ steps.marketplace-solver.outputs.tags }} + node-validator-tag: ${{ steps.node-validator.outputs.tags }} steps: - name: Checkout Repository uses: actions/checkout@v4 @@ -272,6 +275,12 @@ jobs: with: images: ghcr.io/espressosystems/espresso-sequencer/marketplace-solver + - name: Generate node-validator metadata + uses: docker/metadata-action@v5 + id: node-validator + with: + images: ghcr.io/espressosystems/espresso-sequencer/node-validator + - name: Build and push sequencer docker uses: docker/build-push-action@v6 with: @@ -421,6 +430,16 @@ jobs: tags: ${{ steps.marketplace-solver.outputs.tags }} labels: ${{ steps.marketplace-solver.outputs.labels }} + - name: Build and push node-validator docker + uses: docker/build-push-action@v6 + with: + context: ./ + file: ./docker/node-validator.Dockerfile + platforms: linux/amd64,linux/arm64 + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.node-validator.outputs.tags }} + labels: ${{ steps.node-validator.outputs.labels }} + test-demo: if: ${{ github.event_name != 'pull_request' }} runs-on: ubuntu-latest @@ -450,6 +469,7 @@ jobs: docker pull ${{ needs.build-dockers.outputs.nasty-client-tag }} docker pull ${{ needs.build-dockers.outputs.bridge-tag }} docker pull ${{ needs.build-dockers.outputs.marketplace-solver-tag }} + docker pull ${{ needs.build-dockers.outputs.node-validator-tag }} - name: Tag new docker images run: | @@ -467,6 +487,7 @@ jobs: docker tag ${{ needs.build-dockers.outputs.nasty-client-tag }} ghcr.io/espressosystems/espresso-sequencer/nasty-client:main docker tag ${{ needs.build-dockers.outputs.bridge-tag }} ghcr.io/espressosystems/espresso-sequencer/bridge:main docker tag ${{ needs.build-dockers.outputs.marketplace-solver-tag }} ghcr.io/espressosystems/espresso-sequencer/marketplace-solver:main + docker tag ${{ needs.build-dockers.outputs.node-validator-tag }} ghcr.io/espressosystems/espresso-sequencer/node-validator:main - name: Test docker demo run: | diff --git a/.github/workflows/build_static.yml b/.github/workflows/build_static.yml index 73187114b..8edfb1550 100644 --- a/.github/workflows/build_static.yml +++ b/.github/workflows/build_static.yml @@ -92,6 +92,7 @@ jobs: ${{ env.CARGO_TARGET_DIR }}/${{ env.TARGET_TRIPLET }}/release/espresso-bridge ${{ env.CARGO_TARGET_DIR }}/${{ env.TARGET_TRIPLET }}/release/espresso-dev-node ${{ env.CARGO_TARGET_DIR }}/${{ env.TARGET_TRIPLET }}/release/marketplace-solver + ${{ env.CARGO_TARGET_DIR }}/${{ env.TARGET_TRIPLET }}/release/node-metrics static-dockers: runs-on: ubuntu-latest @@ -216,6 +217,13 @@ jobs: images: ghcr.io/espressosystems/espresso-sequencer/marketplace-solver flavor: suffix=musl + - name: Generate node-validator metadata + uses: docker/metadata-action@v5 + id: node-validator + with: + images: ghcr.io/espressosystems/espresso-sequencer/node-validator + flavor: suffix=musl + - name: Build and push sequencer docker uses: docker/build-push-action@v6 with: @@ -344,4 +352,14 @@ jobs: platforms: linux/amd64,linux/arm64 push: ${{ github.event_name != 'pull_request' }} tags: ${{ steps.marketplace-solver.outputs.tags }} - labels: ${{ steps.marketplace-solver.outputs.labels }} \ No newline at end of file + labels: ${{ steps.marketplace-solver.outputs.labels }} + + - name: Build and push node-validator docker + uses: docker/build-push-action@v6 + with: + context: ./ + file: ./docker/node-validator.Dockerfile + platforms: linux/amd64,linux/arm64 + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.node-validator.outputs.tags }} + labels: ${{ steps.node-validator.outputs.labels }} \ No newline at end of file diff --git a/.typos.toml b/.typos.toml index 53cdcf235..37ee97ca2 100644 --- a/.typos.toml +++ b/.typos.toml @@ -4,4 +4,5 @@ extend-exclude = [ "doc/*.svg", "contracts/lib", "contract-bindings", + "node-metrics/src/api/node_validator/v0/example_prometheus_metrics_output.txt", ] diff --git a/Cargo.lock b/Cargo.lock index f8b2e637e..cc8aa6773 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1683,6 +1683,12 @@ dependencies = [ "inout", ] +[[package]] +name = "circular-buffer" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da987586004ae7c43b7df5e3f7693775068522e1086f8d9b2d74c778a0f43313" + [[package]] name = "clap" version = "4.5.13" @@ -6448,6 +6454,38 @@ dependencies = [ "libc", ] +[[package]] +name = "node-metrics" +version = "0.1.0" +dependencies = [ + "async-compatibility-layer", + "async-std", + "async-trait", + "bincode", + "bitvec", + "circular-buffer", + "clap", + "espresso-types", + "futures", + "hotshot", + "hotshot-query-service", + "hotshot-stake-table", + "hotshot-testing", + "hotshot-types", + "prometheus-parse", + "reqwest 0.12.5", + "serde", + "serde_json", + "surf-disco", + "tagged-base64", + "tide-disco", + "time 0.3.36", + "toml", + "tracing", + "url", + "vbs", +] + [[package]] name = "nom" version = "7.1.3" @@ -7307,6 +7345,18 @@ dependencies = [ "syn 2.0.72", ] +[[package]] +name = "prometheus-parse" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "811031bea65e5a401fb2e1f37d802cca6601e204ac463809a3189352d13b78a5" +dependencies = [ + "chrono", + "itertools 0.12.1", + "once_cell", + "regex", +] + [[package]] name = "proptest" version = "1.5.0" diff --git a/Cargo.toml b/Cargo.toml index d215f59dd..4ccf61184 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,7 @@ members = [ "contracts/rust/gen-vk-contract", "hotshot-state-prover", "marketplace-solver", + "node-metrics", "sequencer", "types", "utils", @@ -31,8 +32,8 @@ ark-ff = "0.4" ark-poly = "0.4" ark-serialize = "0.4" ark-srs = "0.3.1" -async-compatibility-layer = { version = "1.1", default-features = false, features = [ - "logging-utils", +async-compatibility-layer = { version = "1.2.1", default-features = false, features = [ + "logging-utils", ] } async-once-cell = "0.5" async-std = { version = "1.12.0", features = ["attributes", "tokio1"] } @@ -40,7 +41,9 @@ async-trait = "0.1" base64 = "0.22" base64-bytes = "0.1" bincode = "1.3.3" +bitvec = "1.0.1" blake3 = "1.5" +circular-buffer = "0.1.7" clap = { version = "4.4", features = ["derive", "env", "string"] } cld = "0.5" derive_more = "0.99.17" @@ -133,3 +136,4 @@ paste = "1.0" rand = "0.8.5" time = "0.3" trait-set = "0.3.0" + diff --git a/builder/src/bin/permissioned-builder.rs b/builder/src/bin/permissioned-builder.rs index cd938bfb5..7936a0a28 100644 --- a/builder/src/bin/permissioned-builder.rs +++ b/builder/src/bin/permissioned-builder.rs @@ -256,6 +256,7 @@ async fn main() -> anyhow::Result<()> { private_staking_key: private_staking_key.clone(), private_state_key, state_peers: opt.state_peers, + public_api_url: None, config_peers: None, catchup_backoff: Default::default(), }; diff --git a/docker-compose.yaml b/docker-compose.yaml index 381bf3900..aff22c3bb 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -236,6 +236,16 @@ services: - ESPRESSO_SEQUENCER_LIBP2P_BIND_ADDRESS=0.0.0.0:$ESPRESSO_DEMO_SEQUENCER_LIBP2P_PORT_0 - ESPRESSO_SEQUENCER_LIBP2P_ADVERTISE_ADDRESS=sequencer0:$ESPRESSO_DEMO_SEQUENCER_LIBP2P_PORT_0 - ESPRESSO_SEQUENCER_IS_DA=true + - ESPRESSO_SEQUENCER_IDENTITY_NODE_NAME=sequencer0 + - ESPRESSO_SEQUENCER_IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000000 + - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_NAME=Espresso Systems + - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_WEBSITE=https://www.espressosys.com/ + - ESPRESSO_SEQUENCER_IDENTITY_OPERATING_SYSTEM=Linux 5.15.153.1 + - ESPRESSO_SEQUENCER_IDENTITY_NETWORK_TYPE=local + - ESPRESSO_SEQUENCER_IDENTITY_COUNTRY_CODE=US + - ESPRESSO_SEQUENCER_IDENTITY_LATITUDE=40.7128 + - ESPRESSO_SEQUENCER_IDENTITY_LONGITUDE=-74.0060 + - ESPRESSO_SEQUENCER_PUBLIC_API_URL=http://sequencer0:$ESPRESSO_SEQUENCER_API_PORT/ - RUST_LOG - RUST_LOG_FORMAT - ASYNC_STD_THREAD_COUNT @@ -282,6 +292,16 @@ services: - ESPRESSO_SEQUENCER_LIBP2P_BIND_ADDRESS=0.0.0.0:$ESPRESSO_DEMO_SEQUENCER_LIBP2P_PORT_1 - ESPRESSO_SEQUENCER_LIBP2P_ADVERTISE_ADDRESS=sequencer1:$ESPRESSO_DEMO_SEQUENCER_LIBP2P_PORT_1 - ESPRESSO_SEQUENCER_IS_DA=true + - ESPRESSO_SEQUENCER_IDENTITY_NODE_NAME=sequencer1 + - ESPRESSO_SEQUENCER_IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000001 + - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_NAME=Espresso Systems + - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_WEBSITE=https://www.espressosys.com/ + - ESPRESSO_SEQUENCER_IDENTITY_OPERATING_SYSTEM=Darwin 23.5.0 + - ESPRESSO_SEQUENCER_IDENTITY_NETWORK_TYPE=local + - ESPRESSO_SEQUENCER_IDENTITY_COUNTRY_CODE=GR + - ESPRESSO_SEQUENCER_IDENTITY_LATITUDE=39.0742 + - ESPRESSO_SEQUENCER_IDENTITY_LONGITUDE=21.8243 + - ESPRESSO_SEQUENCER_PUBLIC_API_URL=http://sequencer1:$ESPRESSO_SEQUENCER_API_PORT/ - RUST_LOG - RUST_LOG_FORMAT - ASYNC_STD_THREAD_COUNT @@ -324,6 +344,16 @@ services: - ESPRESSO_SEQUENCER_LIBP2P_BIND_ADDRESS=0.0.0.0:$ESPRESSO_DEMO_SEQUENCER_LIBP2P_PORT_2 - ESPRESSO_SEQUENCER_LIBP2P_ADVERTISE_ADDRESS=sequencer2:$ESPRESSO_DEMO_SEQUENCER_LIBP2P_PORT_2 - ESPRESSO_SEQUENCER_IS_DA=true + - ESPRESSO_SEQUENCER_IDENTITY_NODE_NAME=sequencer2 + - ESPRESSO_SEQUENCER_IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000002 + - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_NAME=Espresso Systems + - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_WEBSITE=https://www.espressosys.com/ + - ESPRESSO_SEQUENCER_IDENTITY_OPERATING_SYSTEM=Darwin 23.5.0 + - ESPRESSO_SEQUENCER_IDENTITY_NETWORK_TYPE=local + - ESPRESSO_SEQUENCER_IDENTITY_COUNTRY_CODE=CN + - ESPRESSO_SEQUENCER_IDENTITY_LATITUDE=35.8617 + - ESPRESSO_SEQUENCER_IDENTITY_LONGITUDE=104.1954 + - ESPRESSO_SEQUENCER_PUBLIC_API_URL=http://sequencer2:$ESPRESSO_SEQUENCER_API_PORT/ - RUST_LOG - RUST_LOG_FORMAT - ASYNC_STD_THREAD_COUNT @@ -362,6 +392,17 @@ services: - ESPRESSO_SEQUENCER_PRIVATE_STATE_KEY=$ESPRESSO_DEMO_SEQUENCER_STATE_PRIVATE_KEY_3 - ESPRESSO_SEQUENCER_LIBP2P_BIND_ADDRESS=0.0.0.0:$ESPRESSO_DEMO_SEQUENCER_LIBP2P_PORT_3 - ESPRESSO_SEQUENCER_LIBP2P_ADVERTISE_ADDRESS=sequencer3:$ESPRESSO_DEMO_SEQUENCER_LIBP2P_PORT_3 + - ESPRESSO_SEQUENCER_IDENTITY_NODE_NAME=sequencer3 + - ESPRESSO_SEQUENCER_IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000003 + - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_NAME=Espresso Systems + - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_WEBSITE=https://www.espressosys.com/ + - ESPRESSO_SEQUENCER_IDENTITY_OPERATING_SYSTEM=Microsoft Windows NT 10.0.22621.0 + - ESPRESSO_SEQUENCER_IDENTITY_E=espresso-sequencer@0.1.0 + - ESPRESSO_SEQUENCER_IDENTITY_NETWORK_TYPE=local + - ESPRESSO_SEQUENCER_IDENTITY_COUNTRY_CODE=CN + - ESPRESSO_SEQUENCER_IDENTITY_LATITUDE=35.8617 + - ESPRESSO_SEQUENCER_IDENTITY_LONGITUDE=104.1954 + - ESPRESSO_SEQUENCER_PUBLIC_API_URL=http://sequencer3:$ESPRESSO_SEQUENCER_API_PORT/ - RUST_LOG - RUST_LOG_FORMAT - ASYNC_STD_THREAD_COUNT @@ -400,6 +441,16 @@ services: - ESPRESSO_SEQUENCER_PRIVATE_STATE_KEY=$ESPRESSO_DEMO_SEQUENCER_STATE_PRIVATE_KEY_4 - ESPRESSO_SEQUENCER_LIBP2P_BIND_ADDRESS=0.0.0.0:$ESPRESSO_DEMO_SEQUENCER_LIBP2P_PORT_4 - ESPRESSO_SEQUENCER_LIBP2P_ADVERTISE_ADDRESS=sequencer4:$ESPRESSO_DEMO_SEQUENCER_LIBP2P_PORT_4 + - ESPRESSO_SEQUENCER_IDENTITY_NODE_NAME=sequencer4 + - ESPRESSO_SEQUENCER_IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000004 + - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_NAME=Espresso Systems + - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_WEBSITE=https://www.espressosys.com/ + - ESPRESSO_SEQUENCER_IDENTITY_OPERATING_SYSTEM=TempleOS 5.03 + - ESPRESSO_SEQUENCER_IDENTITY_NETWORK_TYPE=local + - ESPRESSO_SEQUENCER_IDENTITY_COUNTRY_CODE=AU + - ESPRESSO_SEQUENCER_IDENTITY_LATITUDE=-25.2744 + - ESPRESSO_SEQUENCER_IDENTITY_LONGITUDE=133.7751 + - ESPRESSO_SEQUENCER_PUBLIC_API_URL=http://sequencer4:$ESPRESSO_SEQUENCER_API_PORT/ - RUST_LOG - RUST_LOG_FORMAT - ASYNC_STD_THREAD_COUNT @@ -542,6 +593,26 @@ services: solver-db: condition: service_healthy + node-validator: + image: ghcr.io/espressosystems/espresso-sequencer/node-validator:main + ports: + - "$ESPRESSO_NODE_VALIDATOR_PORT:$ESPRESSO_NODE_VALIDATOR_PORT" + environment: + - ESPRESSO_NODE_VALIDATOR_STAKE_TABLE_SOURCE_BASE_URL=http://sequencer0:$ESPRESSO_SEQUENCER_API_PORT/v0/ + - ESPRESSO_NODE_VALIDATOR_LEAF_STREAM_SOURCE_BASE_URL=http://sequencer0:$ESPRESSO_SEQUENCER_API_PORT/v0/ + - ESPRESSO_NODE_VALIDATOR_INITIAL_NODE_PUBLIC_BASE_URLS=http://sequencer0:$ESPRESSO_SEQUENCER_API_PORT,http://sequencer1:$ESPRESSO_SEQUENCER1_API_PORT,http://sequencer2:$ESPRESSO_SEQUENCER2_API_PORT,http://sequencer3:$ESPRESSO_SEQUENCER3_API_PORT,http://sequencer4:$ESPRESSO_SEQUENCER4_API_PORT + depends_on: + sequencer0: + condition: service_healthy + sequencer1: + condition: service_healthy + sequencer2: + condition: service_healthy + sequencer3: + condition: service_healthy + sequencer4: + condition: service_healthy + sequencer-db-0: image: postgres user: root diff --git a/docker/node-validator.Dockerfile b/docker/node-validator.Dockerfile new file mode 100644 index 000000000..5a7690f65 --- /dev/null +++ b/docker/node-validator.Dockerfile @@ -0,0 +1,18 @@ +FROM ubuntu:jammy + +ARG TARGETARCH + +RUN apt-get update \ + && apt-get install -y curl libcurl4 wait-for-it tini \ + && rm -rf /var/lib/apt/lists/* +ENTRYPOINT ["tini", "--"] + +COPY target/$TARGETARCH/release/node-metrics /bin/node-metrics +RUN chmod +x /bin/node-metrics + +# Run a web server on this port by default. Port can be overridden by the container orchestrator. +ENV ESPRESSO_NODE_VALIDATOR_PORT=80 + +CMD [ "/bin/node-metrics"] +HEALTHCHECK --interval=1s --timeout=1s --retries=100 CMD curl --fail http://localhost:${ESPRESSO_NODE_VALIDATOR_PORT}/healthcheck || exit 1 +EXPOSE ${ESPRESSO_NODE_VALIDATOR_PORT} diff --git a/node-metrics/Cargo.toml b/node-metrics/Cargo.toml new file mode 100644 index 000000000..51afc0430 --- /dev/null +++ b/node-metrics/Cargo.toml @@ -0,0 +1,39 @@ +[package] +name = "node-metrics" +description = "A Library for collecting, recording and distributing information about the Espresso Block Chain Network" +version = { workspace = true } +authors = { workspace = true } +edition = { workspace = true } + +[features] +testing = ["serde_json", "espresso-types/testing"] + +[dependencies] +async-compatibility-layer = { workspace = true } +async-std = { workspace = true } +async-trait = { workspace = true } +bincode = { workspace = true } +bitvec = { workspace = true } +circular-buffer = { workspace = true } +clap = { workspace = true } +espresso-types = { path = "../types" } +futures = { workspace = true } +hotshot = { workspace = true } +hotshot-query-service = { workspace = true } +hotshot-stake-table = { workspace = true } +hotshot-types = { workspace = true } +prometheus-parse = { version = "^0.2.5" } +reqwest = { workspace = true } +serde = { workspace = true } +serde_json = { version = "^1.0.113", optional = true } +surf-disco = { workspace = true } +tagged-base64 = { workspace = true } +tide-disco = { version = "0.9.0" } +time = { workspace = true } +toml = { workspace = true } +tracing = { workspace = true } +url = { workspace = true } +vbs = { workspace = true } + +# Dependencies for feature `testing` +hotshot-testing = { workspace = true, optional = true } diff --git a/node-metrics/src/api/mod.rs b/node-metrics/src/api/mod.rs new file mode 100644 index 000000000..539d436a9 --- /dev/null +++ b/node-metrics/src/api/mod.rs @@ -0,0 +1 @@ +pub mod node_validator; diff --git a/node-metrics/src/api/node_validator/mod.rs b/node-metrics/src/api/node_validator/mod.rs new file mode 100644 index 000000000..2d24cd45f --- /dev/null +++ b/node-metrics/src/api/node_validator/mod.rs @@ -0,0 +1 @@ +pub mod v0; diff --git a/node-metrics/src/api/node_validator/v0/cdn/mod.rs b/node-metrics/src/api/node_validator/v0/cdn/mod.rs new file mode 100644 index 000000000..143170c79 --- /dev/null +++ b/node-metrics/src/api/node_validator/v0/cdn/mod.rs @@ -0,0 +1,594 @@ +use crate::api::node_validator::v0::create_node_validator_api::ExternalMessage; +use async_std::task::JoinHandle; +use espresso_types::{PubKey, SeqTypes}; +use futures::{channel::mpsc::SendError, Sink, SinkExt}; +use hotshot::{ + traits::NetworkError, + types::{Message, SignatureKey}, +}; +use hotshot_types::{ + message::{MessageKind, VersionedMessage}, + traits::{ + network::{BroadcastDelay, ConnectedNetwork, Topic}, + node_implementation::NodeType, + }, +}; +use url::Url; + +/// ConnectedNetworkConsumer represents a trait that splits up a portion of +/// the ConnectedNetwork trait, so that the consumer only needs to be aware of +/// the `wait_for_ready` and `recv_msgs` functions. +#[async_trait::async_trait] +pub trait ConnectedNetworkConsumer { + /// [wait_for_ready] will not return until the network is ready to be + /// utilized. + async fn wait_for_ready(&self); + + /// [recv_msgs] will return a list of messages that have been received from + /// the network. + /// + /// ## Errors + /// + /// All errors are expected to be network related. + async fn recv_msgs(&self) -> Result>, NetworkError>; +} + +#[async_trait::async_trait] +impl ConnectedNetworkConsumer for N +where + K: SignatureKey + Send + Sync + 'static, + N: ConnectedNetwork + 'static, +{ + async fn wait_for_ready(&self) { + >::wait_for_ready(self).await + } + + async fn recv_msgs(&self) -> Result>, NetworkError> { + let cloned_self = self.clone(); + >::recv_msgs(&cloned_self).await + } +} + +/// CdnReceiveMessagesTask represents a task that is responsible for receiving +/// messages from the CDN network and processing them. +/// This task is primarily concerned with recording responses to RollCall +/// requests, and forwarding any discovered public API URLs to the URL sender. +pub struct CdnReceiveMessagesTask { + task_handle: Option>, +} + +impl CdnReceiveMessagesTask { + /// Creates a new `CdnReceiveMessagesTask` with the given network and + /// URL sender. Calling this function will create an async task that + /// will begin executing immediately. The handle for the task will + /// be in the returned structure. + pub fn new(network: N, url_sender: K) -> Self + where + N: ConnectedNetworkConsumer<::SignatureKey> + Send + 'static, + K: Sink + Clone + Send + Unpin + 'static, + { + let task_handle = async_std::task::spawn(Self::process_cdn_messages(network, url_sender)); + Self { + task_handle: Some(task_handle), + } + } + + /// [process_cdn_messages] is the function that will begin consuming + /// messages off of the CDN, and start handling them. + /// + /// At the moment, this only looks for and recognizes + /// [MessageKind::External] messages, and attempts to decode + /// [ExternalMessage] from those contained pieces of data. Though, in the + /// future this may be able to be expanded to other things. + async fn process_cdn_messages(network: N, url_sender: K) + where + N: ConnectedNetworkConsumer<::SignatureKey> + Send + 'static, + K: Sink + Clone + Send + Unpin + 'static, + { + network.wait_for_ready().await; + let mut url_sender = url_sender; + + loop { + let messages_result = network.recv_msgs().await; + let messages = match messages_result { + Ok(message) => message, + Err(err) => { + tracing::error!("error receiving message: {:?}", err); + continue; + } + }; + + for message in messages { + // We want to try and decode this message. + let message_deserialize_result = Message::::deserialize(&message, &None); + + let message = match message_deserialize_result { + Ok(message) => message, + Err(err) => { + tracing::error!("error deserializing message: {:?}", err); + continue; + } + }; + + let external_message_deserialize_result = match message.kind { + MessageKind::External(external_message) => { + bincode::deserialize::(&external_message) + } + _ => { + tracing::error!("unexpected message kind: {:?}", message); + continue; + } + }; + + let external_message = match external_message_deserialize_result { + Ok(external_message) => external_message, + Err(err) => { + tracing::error!("error deserializing message: {:?}", err); + continue; + } + }; + + match external_message { + ExternalMessage::RollCallResponse(roll_call_info) => { + let public_api_url = roll_call_info.public_api_url; + + // We have a public api url, so we can process this url. + + if let Err(err) = url_sender.send(public_api_url).await { + tracing::error!("error sending public api url: {:?}", err); + return; + } + } + + _ => { + // We're not concerned about other message types + } + } + } + } + } +} + +impl Drop for CdnReceiveMessagesTask { + fn drop(&mut self) { + if let Some(task_handle) = self.task_handle.take() { + async_std::task::block_on(task_handle.cancel()); + } + } +} + +/// ConnectedNetworkPublisher represents a trait that splits up a portion of +/// the ConnectedNetwork trait, so that the consumer only needs to be aware of +/// the `wait_for_ready` and `broadcast_message` functions. +#[async_trait::async_trait] +pub trait ConnectedNetworkPublisher { + /// [wait_for_ready] will not return until the network is ready to be + /// utilized. + async fn wait_for_ready(&self); + + /// [broadcast_message] will broadcast the given message to some subset of + /// nodes in the network based on the given topic. + /// + /// This is a blocking operation. + async fn broadcast_message( + &self, + message: Vec, + topic: Topic, + broadcast_delay: BroadcastDelay, + ) -> Result<(), NetworkError>; +} + +#[async_trait::async_trait] +impl ConnectedNetworkPublisher for N +where + K: SignatureKey + Send + Sync + 'static, + N: ConnectedNetwork + 'static, +{ + async fn wait_for_ready(&self) { + >::wait_for_ready(self).await + } + + async fn broadcast_message( + &self, + message: Vec, + topic: Topic, + broadcast_delay: BroadcastDelay, + ) -> Result<(), NetworkError> { + >::broadcast_message(self, message, topic, broadcast_delay).await + } +} + +/// BroadcastRollCallTask represents a task that is responsible for broadcasting +/// a RollCallRequest to the CDN network. +pub struct BroadcastRollCallTask { + task_handle: Option>, +} + +impl BroadcastRollCallTask { + /// Creates a new `BroadcastRollCallTask` with the given network and + /// public key. Calling this function will create an async task that + /// will begin executing immediately. The handle for the task will + /// be in the returned structure. + /// + /// This task only performs one action, and then returns. It is not + /// long-lived. + pub fn new(network: N, public_key: PubKey) -> Self + where + N: ConnectedNetworkPublisher<::SignatureKey> + Send + 'static, + { + let task_handle = async_std::task::spawn(Self::broadcast_roll_call(network, public_key)); + Self { + task_handle: Some(task_handle), + } + } + + /// [broadcast_roll_call] is the function that will broadcast a + /// RollCallRequest to the CDN network in order to request responses from + /// the rest of the network participants, so we can collect the public API + /// URLs in the message consuming task. + async fn broadcast_roll_call(network: N, public_key: PubKey) + where + N: ConnectedNetworkPublisher<::SignatureKey> + Send + 'static, + { + network.wait_for_ready().await; + + // We want to send the Roll Call Request + let rollcall_request = ExternalMessage::RollCallRequest(public_key); + let rollcall_request_serialized = match bincode::serialize(&rollcall_request) { + Ok(rollcall_request_serialized) => rollcall_request_serialized, + Err(err) => { + tracing::error!("error serializing rollcall request: {:?}", err); + return; + } + }; + + let hotshot_message = Message:: { + sender: public_key, + kind: MessageKind::External(rollcall_request_serialized), + }; + + let hotshot_message_serialized = match hotshot_message.serialize(&None) { + Ok(hotshot_message_serialized) => hotshot_message_serialized, + Err(err) => { + tracing::error!("error serializing hotshot message: {:?}", err); + return; + } + }; + + let broadcast_result = network + .broadcast_message( + hotshot_message_serialized, + Topic::Global, + BroadcastDelay::None, + ) + .await; + if let Err(err) = broadcast_result { + tracing::error!("error broadcasting rollcall request: {:?}", err); + } + + tracing::info!("broadcast roll call request completed"); + } +} + +impl Drop for BroadcastRollCallTask { + fn drop(&mut self) { + if let Some(task_handle) = self.task_handle.take() { + async_std::task::block_on(task_handle.cancel()); + } + } +} + +#[cfg(test)] +mod test { + use super::{BroadcastRollCallTask, ConnectedNetworkConsumer, ConnectedNetworkPublisher}; + use crate::api::node_validator::v0::create_node_validator_api::ExternalMessage; + use crate::api::node_validator::v0::{ + cdn::CdnReceiveMessagesTask, create_node_validator_api::RollCallInfo, + }; + use async_std::future::TimeoutError; + use async_std::prelude::FutureExt; + use core::panic; + use espresso_types::SeqTypes; + use futures::channel::mpsc::Sender; + use futures::SinkExt; + use futures::{ + channel::mpsc::{self}, + StreamExt, + }; + use hotshot::types::SignatureKey; + use hotshot::{ + traits::NetworkError, + types::{BLSPubKey, Message}, + }; + use hotshot_types::message::{DataMessage, MessageKind, VersionedMessage}; + use hotshot_types::traits::network::{BroadcastDelay, ResponseMessage}; + use std::time::Duration; + use url::Url; + + /// [TestConnectedNetworkConsumer] is a test implementation of the + /// [ConnectedNetworkConsumer] trait that allows for the simulation of + /// network messages being received. + struct TestConnectedNetworkConsumer(Result>, NetworkError>); + + /// [clone_result] is a helper function that clones the result of a + /// network message receive operation. This is used to ensure that the + /// original result is not consumed by the task. + fn clone_result( + result: &Result>, NetworkError>, + ) -> Result>, NetworkError> { + match result { + Ok(messages) => Ok(messages.clone()), + Err(err) => match err { + NetworkError::ChannelSend => Err(NetworkError::ChannelSend), + _ => panic!("unexpected network error"), + }, + } + } + + #[async_trait::async_trait] + impl ConnectedNetworkConsumer for TestConnectedNetworkConsumer { + async fn wait_for_ready(&self) {} + + async fn recv_msgs(&self) -> Result>, NetworkError> { + async_std::task::sleep(Duration::from_millis(5)).await; + clone_result(&self.0) + } + } + + /// [test_cdn_receive_messages_task] is a test that verifies that the + /// an expected External Message can be encoded, decoded, and sent to the + /// url_sender appropriately. + #[async_std::test] + async fn test_cdn_receive_messages_task() { + let test_hotshot_message_serialized = { + let test_url = Url::parse("http://localhost:8080/").unwrap(); + + let test_external_message = ExternalMessage::RollCallResponse(RollCallInfo { + public_api_url: test_url.clone(), + }); + + let external_message_encoded = bincode::serialize(&test_external_message).unwrap(); + + let test_message = Message:: { + sender: BLSPubKey::generated_from_seed_indexed([0; 32], 0).0, + kind: MessageKind::External(external_message_encoded), + }; + + hotshot_types::message::VersionedMessage::serialize(&test_message, &None).unwrap() + }; + + let (url_sender, url_receiver) = mpsc::channel(1); + let task = CdnReceiveMessagesTask::new( + TestConnectedNetworkConsumer(Ok(vec![test_hotshot_message_serialized])), + url_sender, + ); + + let mut url_receiver = url_receiver; + let next_message = url_receiver + .next() + .timeout(Duration::from_millis(50)) + .await + .unwrap() + .unwrap(); + + assert_eq!(next_message, Url::parse("http://localhost:8080/").unwrap()); + + drop(task); + } + + /// [test_cdn_receive_messages_task_fails_receiving_message] is a test that + /// verifies that the task does not close, nor send a url, when it + /// encounters an error from the recv_msgs function. + #[async_std::test] + async fn test_cdn_receive_messages_task_fails_receiving_message() { + let (url_sender, url_receiver) = mpsc::channel(1); + let task = CdnReceiveMessagesTask::new( + TestConnectedNetworkConsumer(Err(NetworkError::ChannelSend)), + url_sender, + ); + + let mut url_receiver = url_receiver; + // The task should not panic when it fails to receive a message. + let receive_result = url_receiver.next().timeout(Duration::from_millis(50)).await; + + if let Err(TimeoutError { .. }) = receive_result { + // This is expected + } else { + panic!("receive did not timeout"); + } + + drop(task); + } + + /// [test_cdn_receive_messages_task_fails_decoding_hotshot_message] is a + /// test that verifies that the task does not close, nor send a url, when it + /// encounters an error from the deserialization of the hotshot message. + #[async_std::test] + async fn test_cdn_receive_messages_task_fails_decoding_hotshot_message() { + let (url_sender, url_receiver) = mpsc::channel(1); + let task = CdnReceiveMessagesTask::new( + TestConnectedNetworkConsumer(Ok(vec![vec![0]])), + url_sender, + ); + + let mut url_receiver = url_receiver; + // The task should not panic when it fails to receive a message. + let receive_result = url_receiver.next().timeout(Duration::from_millis(50)).await; + + if let Err(TimeoutError { .. }) = receive_result { + // This is expected + } else { + panic!("receive did not timeout"); + } + + drop(task); + } + + /// [test_cdn_receive_messages_task_fails_unexpected_hotshot_message_variant] + /// is a test that verifies that the task does not close, nor send a url, when + /// it encounters a hotshot message that was not an External message. + /// + /// This really shouldn't happen in practice. + #[async_std::test] + async fn test_cdn_receive_messages_task_fails_unexpected_hotshot_message_variant() { + let (url_sender, url_receiver) = mpsc::channel(1); + let bytes = VersionedMessage::serialize( + &Message:: { + sender: BLSPubKey::generated_from_seed_indexed([0; 32], 0).0, + kind: MessageKind::Data(DataMessage::DataResponse(ResponseMessage::NotFound)), + }, + &None, + ) + .unwrap(); + + let task = + CdnReceiveMessagesTask::new(TestConnectedNetworkConsumer(Ok(vec![bytes])), url_sender); + + let mut url_receiver = url_receiver; + // The task should not panic when it fails to receive a message. + let receive_result = url_receiver.next().timeout(Duration::from_millis(50)).await; + + if let Err(TimeoutError { .. }) = receive_result { + // This is expected + } else { + panic!("receive did not timeout"); + } + + drop(task); + } + + /// [test_cdn_receive_messages_task_fails_decoding_external_message] is a + /// test that verifies that the task does not close, nor send a url, when + /// it encounters an error from the deserialization of the external message. + #[async_std::test] + async fn test_cdn_receive_messages_task_fails_decoding_external_message() { + let (url_sender, url_receiver) = mpsc::channel(1); + let bytes = VersionedMessage::serialize( + &Message:: { + sender: BLSPubKey::generated_from_seed_indexed([0; 32], 0).0, + kind: MessageKind::External(vec![0]), + }, + &None, + ) + .unwrap(); + + let task = + CdnReceiveMessagesTask::new(TestConnectedNetworkConsumer(Ok(vec![bytes])), url_sender); + + let mut url_receiver = url_receiver; + // The task should not panic when it fails to receive a message. + let receive_result = url_receiver.next().timeout(Duration::from_millis(50)).await; + + if let Err(TimeoutError { .. }) = receive_result { + // This is expected + } else { + panic!("receive did not timeout"); + } + + drop(task); + } + + /// [test_cdn_receive_messages_tasks_exits_when_url_receiver_closed] is a + /// test that verifies that the task exits when the url receiver is closed. + /// + /// Without being able to send urls to the url_sender, the task doesn't + /// really have a point in existing. + #[async_std::test] + async fn test_cdn_receive_messages_tasks_exits_when_url_receiver_closed() { + let (url_sender, url_receiver) = mpsc::channel(1); + + let test_hotshot_message_serialized = { + let test_url = Url::parse("http://localhost:8080/").unwrap(); + + let test_external_message = ExternalMessage::RollCallResponse(RollCallInfo { + public_api_url: test_url.clone(), + }); + + let external_message_encoded = bincode::serialize(&test_external_message).unwrap(); + + let test_message = Message:: { + sender: BLSPubKey::generated_from_seed_indexed([0; 32], 0).0, + kind: MessageKind::External(external_message_encoded), + }; + + hotshot_types::message::VersionedMessage::serialize(&test_message, &None).unwrap() + }; + drop(url_receiver); + + let mut task = CdnReceiveMessagesTask::new( + TestConnectedNetworkConsumer(Ok(vec![test_hotshot_message_serialized])), + url_sender.clone(), + ); + + let task_handle = task.task_handle.take(); + + if let Some(task_handle) = task_handle { + assert_eq!(task_handle.timeout(Duration::from_millis(50)).await, Ok(())); + } + } + + /// [TestConnectedNetworkPublisher] is a test implementation of the + /// [ConnectedNetworkPublisher] trait that allows for the simulation of + /// network messages being sent. + struct TestConnectedNetworkPublisher(Sender>); + + #[async_trait::async_trait] + impl ConnectedNetworkPublisher for TestConnectedNetworkPublisher { + async fn wait_for_ready(&self) {} + + async fn broadcast_message( + &self, + message: Vec, + _topic: hotshot_types::traits::network::Topic, + _broadcast_delay: BroadcastDelay, + ) -> Result<(), NetworkError> { + let mut sender = self.0.clone(); + let send_result = sender.send(message).await; + send_result.map_err(|_| NetworkError::ChannelSend) + } + } + + /// [test_cdn_broadcast_roll_call_task] is a test that verifies that the + /// task broadcasts a RollCallRequest message to the network. It also + /// verifies that the task is short-lived, as it does not need to persist + /// beyond it's initial request. + #[async_std::test] + async fn test_cdn_broadcast_roll_call_task() { + let (message_sender, message_receiver) = mpsc::channel(1); + + let task = BroadcastRollCallTask::new( + TestConnectedNetworkPublisher(message_sender), + BLSPubKey::generated_from_seed_indexed([0; 32], 0).0, + ); + + let mut message_receiver = message_receiver; + let next_message = message_receiver.next().await.unwrap(); + let next_message = + as VersionedMessage>::deserialize(&next_message, &None) + .unwrap(); + + let external_message = match next_message.kind { + MessageKind::External(external_message) => external_message, + _ => panic!("unexpected message kind"), + }; + + let external_message = bincode::deserialize::(&external_message).unwrap(); + + match external_message { + ExternalMessage::RollCallRequest(public_key) => { + assert_eq!( + public_key, + BLSPubKey::generated_from_seed_indexed([0; 32], 0).0 + ); + } + _ => panic!("unexpected external message"), + } + + let mut task = task; + let task_handle = task.task_handle.take(); + + if let Some(task_handle) = task_handle { + assert_eq!(task_handle.timeout(Duration::from_millis(50)).await, Ok(())); + } + } +} diff --git a/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs b/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs new file mode 100644 index 000000000..210dc805b --- /dev/null +++ b/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs @@ -0,0 +1,470 @@ +use std::sync::Arc; + +use super::{get_stake_table_from_sequencer, ProcessNodeIdentityUrlStreamTask}; +use crate::service::{ + client_id::ClientId, + client_message::InternalClientMessage, + client_state::{ + ClientThreadState, InternalClientMessageProcessingTask, + ProcessDistributeBlockDetailHandlingTask, ProcessDistributeNodeIdentityHandlingTask, + ProcessDistributeVotersHandlingTask, + }, + data_state::{DataState, ProcessLeafStreamTask, ProcessNodeIdentityStreamTask}, + server_message::ServerMessage, +}; +use async_std::{sync::RwLock, task::JoinHandle}; +use espresso_types::{PubKey, SeqTypes}; +use futures::{ + channel::mpsc::{self, Receiver, SendError, Sender}, + Sink, SinkExt, Stream, StreamExt, +}; +use hotshot_query_service::Leaf; +use hotshot_types::event::{Event, EventType}; +use serde::{Deserialize, Serialize}; +use url::Url; + +pub struct NodeValidatorAPI { + pub process_internal_client_message_handle: Option, + pub process_distribute_block_detail_handle: Option, + pub process_distribute_node_identity_handle: Option, + pub process_distribute_voters_handle: Option, + pub process_leaf_stream_handle: Option, + pub process_node_identity_stream_handle: Option, + pub process_url_stream_handle: Option, + pub url_sender: K, +} + +pub struct NodeValidatorConfig { + pub stake_table_url_base: Url, + pub initial_node_public_base_urls: Vec, +} + +#[derive(Debug)] +pub enum CreateNodeValidatorProcessingError { + FailedToGetStakeTable(hotshot_query_service::Error), +} + +/// An external message that can be sent to or received from a node +#[derive(Serialize, Deserialize, Clone)] +pub enum ExternalMessage { + /// A request for a node to respond with its identifier + /// Contains the public key of the node that is requesting the roll call + RollCallRequest(PubKey), + + /// A response to a roll call request + /// Contains the identifier of the node + RollCallResponse(RollCallInfo), +} + +/// Information about a node that is used in a roll call response +#[derive(Serialize, Deserialize, Clone)] +pub struct RollCallInfo { + // The public API URL of the node + pub public_api_url: Url, +} + +/// [HotShotEventProcessingTask] is a task that is capable of processing events +/// that are coming in from a HotShot event stream. This task will keep an +/// eye out for ExternalMessageReceived events that can be decoded as a +/// RollCallResponse. When a RollCallResponse is received, the public API URL +/// of the node that sent the message will be sent to the provided url_sender. +/// +/// Additionally, this can receive Decide events and send the discovered leaves +/// to the provided leaf_sender. This can can be used as a means of receiving +/// leaves that doesn't involve hitting an external service like the task +/// [ProcessProduceLeafStreamTask] does. +pub struct HotShotEventProcessingTask { + pub task_handle: Option>, +} + +impl HotShotEventProcessingTask { + /// [new] creates a new [HotShotEventProcessingTask] that will process + /// events from the provided event_stream. + /// + /// Calls to [new] will spawn a new task that will start processing + /// immediately. The task handle will be stored in the returned structure. + pub fn new(event_stream: S, url_sender: K1, leaf_sender: K2) -> Self + where + S: Stream> + Send + Unpin + 'static, + K1: Sink + Send + Unpin + 'static, + K2: Sink, Error = SendError> + Send + Unpin + 'static, + { + let task_handle = async_std::task::spawn(Self::process_messages( + event_stream, + url_sender, + leaf_sender, + )); + + Self { + task_handle: Some(task_handle), + } + } + + /// [process_messages] is a function that will process messages from the + /// provided event stream. + async fn process_messages(event_receiver: S, url_sender: K1, leaf_sender: K2) + where + S: Stream> + Send + Unpin + 'static, + K1: Sink + Unpin, + K2: Sink, Error = SendError> + Unpin, + { + let mut event_stream = event_receiver; + let mut url_sender = url_sender; + let mut leaf_sender = leaf_sender; + loop { + let event_result = event_stream.next().await; + let event = match event_result { + Some(event) => event, + None => { + tracing::info!("event stream closed"); + break; + } + }; + + let Event { event, .. } = event; + + match event { + EventType::Decide { leaf_chain, .. } => { + for leaf_info in leaf_chain.iter().rev() { + let leaf = leaf_info.leaf.clone(); + + let send_result = leaf_sender.send(leaf).await; + if let Err(err) = send_result { + tracing::error!("leaf sender closed: {}", err); + panic!("HotShotEventProcessingTask leaf sender is closed, unrecoverable, the block state will stagnate."); + } + } + } + + EventType::ExternalMessageReceived(external_message_bytes) => { + let roll_call_info = match bincode::deserialize(&external_message_bytes) { + Ok(ExternalMessage::RollCallResponse(roll_call_info)) => roll_call_info, + + Err(err) => { + tracing::info!( + "failed to deserialize external message, unrecognized: {}", + err + ); + continue; + } + + _ => { + // Ignore any other potentially recognized messages + continue; + } + }; + + let public_api_url = roll_call_info.public_api_url; + + // Send the the discovered public url to the sink + let send_result = url_sender.send(public_api_url).await; + if let Err(err) = send_result { + tracing::error!("url sender closed: {}", err); + panic!("HotShotEventProcessingTask url sender is closed, unrecoverable, the node state will stagnate."); + } + } + _ => { + // Ignore all other events + continue; + } + } + } + } +} + +/// [Drop] implementation for [HotShotEventProcessingTask] that will cancel the +/// task when the structure is dropped. +impl Drop for HotShotEventProcessingTask { + fn drop(&mut self) { + if let Some(task_handle) = self.task_handle.take() { + async_std::task::block_on(task_handle.cancel()); + } + } +} + +/// [ProcessExternalMessageHandlingTask] is a task that is capable of processing +/// external messages that are coming in from an external message stream. This +/// task will keep an eye out for ExternalMessageReceived events that can be +/// decoded as a RollCallResponse. When a RollCallResponse is received, the +/// public API URL of the node that sent the message will be sent to the +/// provided url_sender. +/// +/// This task can be used as a means of processing [ExternalMessage]s that are +/// not being provided by a HotShot event stream. It can be used as an +/// alternative to the [HotShotEventProcessingTask] for processing external +/// messages. +pub struct ProcessExternalMessageHandlingTask { + pub task_handle: Option>, +} + +impl ProcessExternalMessageHandlingTask { + /// [new] creates a new [ProcessExternalMessageHandlingTask] that will + /// process external messages from the provided external_message_receiver. + /// + /// Calls to [new] will spawn a new task that will start processing + /// immediately. The task handle will be stored in the returned structure. + pub fn new(external_message_receiver: S, url_sender: K) -> Self + where + S: Stream + Send + Unpin + 'static, + K: Sink + Send + Unpin + 'static, + { + let task_handle = async_std::task::spawn(Self::process_external_messages( + external_message_receiver, + url_sender, + )); + + Self { + task_handle: Some(task_handle), + } + } + + /// [process_external_messages] is a function that will process messages from + /// the provided external message stream. + async fn process_external_messages(external_message_receiver: S, url_sender: K) + where + S: Stream + Send + Unpin + 'static, + K: Sink + Send + Unpin + 'static, + { + let mut external_message_receiver = external_message_receiver; + let mut url_sender = url_sender; + + loop { + let external_message_result = external_message_receiver.next().await; + let external_message = match external_message_result { + Some(external_message) => external_message, + None => { + tracing::error!("external message receiver closed"); + break; + } + }; + + match external_message { + ExternalMessage::RollCallResponse(roll_call_info) => { + let public_api_url = roll_call_info.public_api_url; + + let send_result = url_sender.send(public_api_url).await; + if let Err(err) = send_result { + tracing::error!("url sender closed: {}", err); + break; + } + } + + _ => { + // Ignore all other messages + continue; + } + } + } + } +} + +/// [Drop] implementation for [ProcessExternalMessageHandlingTask] that will +/// cancel the task when the structure is dropped. +impl Drop for ProcessExternalMessageHandlingTask { + fn drop(&mut self) { + if let Some(task_handle) = self.task_handle.take() { + async_std::task::block_on(task_handle.cancel()); + } + } +} + +/** + * create_node_validator_processing is a function that creates a node validator + * processing environment. This function will create a number of tasks that + * will be responsible for processing the data streams that are coming in from + * the various sources. This function will also create the data state that + * will be used to store the state of the network. + */ +pub async fn create_node_validator_processing( + config: NodeValidatorConfig, + internal_client_message_receiver: Receiver>>, + leaf_receiver: Receiver>, +) -> Result>, CreateNodeValidatorProcessingError> { + let client_thread_state = ClientThreadState::>::new( + Default::default(), + Default::default(), + Default::default(), + Default::default(), + ClientId::from_count(1), + ); + + let client_stake_table = surf_disco::Client::new(config.stake_table_url_base.clone()); + + let stake_table = get_stake_table_from_sequencer(client_stake_table) + .await + .map_err(CreateNodeValidatorProcessingError::FailedToGetStakeTable)?; + + let data_state = DataState::new( + Default::default(), + Default::default(), + stake_table, + Default::default(), + ); + + let data_state = Arc::new(RwLock::new(data_state)); + let client_thread_state = Arc::new(RwLock::new(client_thread_state)); + let (block_detail_sender, block_detail_receiver) = mpsc::channel(32); + let (node_identity_sender_1, node_identity_receiver_1) = mpsc::channel(32); + let (node_identity_sender_2, node_identity_receiver_2) = mpsc::channel(32); + let (voters_sender, voters_receiver) = mpsc::channel(32); + let (mut url_sender, url_receiver) = mpsc::channel(32); + + let process_internal_client_message_handle = InternalClientMessageProcessingTask::new( + internal_client_message_receiver, + data_state.clone(), + client_thread_state.clone(), + ); + + let process_distribute_block_detail_handle = ProcessDistributeBlockDetailHandlingTask::new( + client_thread_state.clone(), + block_detail_receiver, + ); + + let process_distribute_node_identity_handle = ProcessDistributeNodeIdentityHandlingTask::new( + client_thread_state.clone(), + node_identity_receiver_2, + ); + + let process_distribute_voters_handle = + ProcessDistributeVotersHandlingTask::new(client_thread_state.clone(), voters_receiver); + + let process_leaf_stream_handle = ProcessLeafStreamTask::new( + leaf_receiver, + data_state.clone(), + block_detail_sender, + voters_sender, + ); + + let process_node_identity_stream_handle = ProcessNodeIdentityStreamTask::new( + node_identity_receiver_1, + data_state.clone(), + node_identity_sender_2, + ); + + let process_url_stream_handle = + ProcessNodeIdentityUrlStreamTask::new(url_receiver, node_identity_sender_1); + + // Send any initial URLS to the url sender for immediate processing. + // These urls are supplied by the configuration of this function + { + let urls = config.initial_node_public_base_urls; + + for url in urls { + let send_result = url_sender.send(url).await; + if let Err(err) = send_result { + tracing::info!("url sender closed: {}", err); + break; + } + } + } + + Ok(NodeValidatorAPI { + process_internal_client_message_handle: Some(process_internal_client_message_handle), + process_distribute_block_detail_handle: Some(process_distribute_block_detail_handle), + process_distribute_node_identity_handle: Some(process_distribute_node_identity_handle), + process_distribute_voters_handle: Some(process_distribute_voters_handle), + process_leaf_stream_handle: Some(process_leaf_stream_handle), + process_node_identity_stream_handle: Some(process_node_identity_stream_handle), + process_url_stream_handle: Some(process_url_stream_handle), + url_sender: url_sender.clone(), + }) +} + +#[cfg(test)] +mod test { + use crate::{ + api::node_validator::v0::{ + HotshotQueryServiceLeafStreamRetriever, ProcessProduceLeafStreamTask, + StateClientMessageSender, STATIC_VER_0_1, + }, + service::{client_message::InternalClientMessage, server_message::ServerMessage}, + }; + use futures::channel::mpsc::{self, Sender}; + use tide_disco::App; + + struct TestState(Sender>>); + + impl StateClientMessageSender> for TestState { + fn sender(&self) -> Sender>> { + self.0.clone() + } + } + + #[async_std::test] + #[ignore] + async fn test_full_setup_example() { + let (internal_client_message_sender, internal_client_message_receiver) = mpsc::channel(32); + let state = TestState(internal_client_message_sender); + + let mut app: App<_, crate::api::node_validator::v0::Error> = App::with_state(state); + let node_validator_api_result = super::super::define_api::(); + let node_validator_api = match node_validator_api_result { + Ok(node_validator_api) => node_validator_api, + Err(err) => { + panic!("error defining node validator api: {:?}", err); + } + }; + + match app.register_module("node-validator", node_validator_api) { + Ok(_) => {} + Err(err) => { + panic!("error registering node validator api: {:?}", err); + } + } + + let (leaf_sender, leaf_receiver) = mpsc::channel(10); + + let process_consume_leaves = ProcessProduceLeafStreamTask::new( + HotshotQueryServiceLeafStreamRetriever::new( + "https://query.cappuccino.testnet.espresso.network/v0" + .parse() + .unwrap(), + ), + leaf_sender, + ); + + let node_validator_task_state = match super::create_node_validator_processing( + super::NodeValidatorConfig { + stake_table_url_base: "https://query.cappuccino.testnet.espresso.network/v0" + .parse() + .unwrap(), + initial_node_public_base_urls: vec![ + "https://query-1.cappuccino.testnet.espresso.network/" + .parse() + .unwrap(), + "https://query-2.cappuccino.testnet.espresso.network/" + .parse() + .unwrap(), + "https://query-3.cappuccino.testnet.espresso.network/" + .parse() + .unwrap(), + "https://query-4.cappuccino.testnet.espresso.network/" + .parse() + .unwrap(), + ], + }, + internal_client_message_receiver, + leaf_receiver, + ) + .await + { + Ok(node_validator_task_state) => node_validator_task_state, + + Err(err) => { + panic!("error defining node validator api: {:?}", err); + } + }; + + // We would like to wait until being signaled + let app_serve_handle = async_std::task::spawn(async move { + let app_serve_result = app.serve("0.0.0.0:9000", STATIC_VER_0_1).await; + tracing::info!("app serve result: {:?}", app_serve_result); + }); + tracing::info!("now listening on port 9000"); + + app_serve_handle.await; + + drop(node_validator_task_state); + drop(process_consume_leaves); + } +} diff --git a/node-metrics/src/api/node_validator/v0/example_prometheus_metrics_output.txt b/node-metrics/src/api/node_validator/v0/example_prometheus_metrics_output.txt new file mode 100644 index 000000000..f49e38610 --- /dev/null +++ b/node-metrics/src/api/node_validator/v0/example_prometheus_metrics_output.txt @@ -0,0 +1,89 @@ +# HELP consensus_cdn_num_failed_messages num_failed_messages +# TYPE consensus_cdn_num_failed_messages counter +consensus_cdn_num_failed_messages 0 +# HELP consensus_current_view current_view +# TYPE consensus_current_view gauge +consensus_current_view 7 +# HELP consensus_invalid_qc invalid_qc +# TYPE consensus_invalid_qc gauge +consensus_invalid_qc 0 +# HELP consensus_last_decided_time last_decided_time +# TYPE consensus_last_decided_time gauge +consensus_last_decided_time 1720537017 +# HELP consensus_last_decided_view last_decided_view +# TYPE consensus_last_decided_view gauge +consensus_last_decided_view 4 +# HELP consensus_last_synced_block_height last_synced_block_height +# TYPE consensus_last_synced_block_height gauge +consensus_last_synced_block_height 4 +# HELP consensus_libp2p_num_connected_peers num_connected_peers +# TYPE consensus_libp2p_num_connected_peers gauge +consensus_libp2p_num_connected_peers 4 +# HELP consensus_libp2p_num_failed_messages num_failed_messages +# TYPE consensus_libp2p_num_failed_messages counter +consensus_libp2p_num_failed_messages 0 +# HELP consensus_node node +# TYPE consensus_node gauge +consensus_node{key="BLS_VER_KEY~bQszS-QKYvUij2g20VqS8asttGSb95NrTu2PUj0uMh1CBUxNy1FqyPDjZqB29M7ZbjWqj79QkEOWkpga84AmDYUeTuWmy-0P1AdKHD3ehc-dKvei78BDj5USwXPJiDUlCxvYs_9rWYhagaq-5_LXENr78xel17spftNd5MA1Mw5U"} 1 +# HELP consensus_node_identity_general node_identity_general +# TYPE consensus_node_identity_general gauge +consensus_node_identity_general{company_name="Espresso Systems",company_website="https://www.espressosys.com/",name="sequencer0",network_type="local",node_type="espresso-sequencer 0.1",operating_system="Linux 5.15.153.1",wallet="0x0000000000000000000000000000000000000000"} 1 +# HELP consensus_node_identity_location node_identity_location +# TYPE consensus_node_identity_location gauge +consensus_node_identity_location{country="US",latitude="-40.7128",longitude="-74.0060"} 1 +# HELP consensus_node_index node_index +# TYPE consensus_node_index gauge +consensus_node_index 4 +# HELP consensus_number_of_empty_blocks_proposed number_of_empty_blocks_proposed +# TYPE consensus_number_of_empty_blocks_proposed counter +consensus_number_of_empty_blocks_proposed 1 +# HELP consensus_number_of_timeouts number_of_timeouts +# TYPE consensus_number_of_timeouts counter +consensus_number_of_timeouts 0 +# HELP consensus_number_of_timeouts_as_leader number_of_timeouts_as_leader +# TYPE consensus_number_of_timeouts_as_leader counter +consensus_number_of_timeouts_as_leader 0 +# HELP consensus_number_of_views_per_decide_event number_of_views_per_decide_event +# TYPE consensus_number_of_views_per_decide_event histogram +consensus_number_of_views_per_decide_event_bucket{le="0.005"} 0 +consensus_number_of_views_per_decide_event_bucket{le="0.01"} 0 +consensus_number_of_views_per_decide_event_bucket{le="0.025"} 0 +consensus_number_of_views_per_decide_event_bucket{le="0.05"} 0 +consensus_number_of_views_per_decide_event_bucket{le="0.1"} 0 +consensus_number_of_views_per_decide_event_bucket{le="0.25"} 0 +consensus_number_of_views_per_decide_event_bucket{le="0.5"} 0 +consensus_number_of_views_per_decide_event_bucket{le="1"} 0 +consensus_number_of_views_per_decide_event_bucket{le="2.5"} 0 +consensus_number_of_views_per_decide_event_bucket{le="5"} 4 +consensus_number_of_views_per_decide_event_bucket{le="10"} 4 +consensus_number_of_views_per_decide_event_bucket{le="+Inf"} 4 +consensus_number_of_views_per_decide_event_sum 12 +consensus_number_of_views_per_decide_event_count 4 +# HELP consensus_number_of_views_since_last_decide number_of_views_since_last_decide +# TYPE consensus_number_of_views_since_last_decide gauge +consensus_number_of_views_since_last_decide 4 +# HELP consensus_outstanding_transactions outstanding_transactions +# TYPE consensus_outstanding_transactions gauge +consensus_outstanding_transactions 0 +# HELP consensus_outstanding_transactions_memory_size outstanding_transactions_memory_size +# TYPE consensus_outstanding_transactions_memory_size gauge +consensus_outstanding_transactions_memory_size 0 +# HELP consensus_version version +# TYPE consensus_version gauge +consensus_version{desc="20240701-15-gbd0957fd-dirty",rev="bd0957fddad19caab010dc59e5a92bc1c95cbc07",timestamp="1980-01-01T00:00:00.000000000Z"} 1 +# HELP consensus_view_duration_as_leader view_duration_as_leader +# TYPE consensus_view_duration_as_leader histogram +consensus_view_duration_as_leader_bucket{le="0.005"} 0 +consensus_view_duration_as_leader_bucket{le="0.01"} 0 +consensus_view_duration_as_leader_bucket{le="0.025"} 0 +consensus_view_duration_as_leader_bucket{le="0.05"} 0 +consensus_view_duration_as_leader_bucket{le="0.1"} 0 +consensus_view_duration_as_leader_bucket{le="0.25"} 0 +consensus_view_duration_as_leader_bucket{le="0.5"} 0 +consensus_view_duration_as_leader_bucket{le="1"} 0 +consensus_view_duration_as_leader_bucket{le="2.5"} 1 +consensus_view_duration_as_leader_bucket{le="5"} 1 +consensus_view_duration_as_leader_bucket{le="10"} 1 +consensus_view_duration_as_leader_bucket{le="+Inf"} 1 +consensus_view_duration_as_leader_sum 2 +consensus_view_duration_as_leader_count 1 diff --git a/node-metrics/src/api/node_validator/v0/mod.rs b/node-metrics/src/api/node_validator/v0/mod.rs new file mode 100644 index 000000000..0cae3c51c --- /dev/null +++ b/node-metrics/src/api/node_validator/v0/mod.rs @@ -0,0 +1,1028 @@ +pub mod cdn; +pub mod create_node_validator_api; + +use crate::service::client_message::{ClientMessage, InternalClientMessage}; +use crate::service::data_state::{LocationDetails, NodeIdentity}; +use crate::service::server_message::ServerMessage; +use async_std::task::JoinHandle; +use espresso_types::SeqTypes; +use futures::channel::mpsc::SendError; +use futures::future::Either; +use futures::{ + channel::mpsc::{self, Sender}, + FutureExt, Sink, SinkExt, Stream, StreamExt, +}; +use hotshot_query_service::Leaf; +use hotshot_stake_table::vec_based::StakeTable; +use hotshot_types::light_client::{CircuitField, StateVerKey}; +use hotshot_types::signature_key::BLSPubKey; +use hotshot_types::traits::{signature_key::StakeTableEntryType, stake_table::StakeTableScheme}; +use hotshot_types::PeerConfig; +use prometheus_parse::{Sample, Scrape}; +use serde::{Deserialize, Serialize}; +use std::fmt; +use std::future::Future; +use std::io::BufRead; +use std::pin::Pin; +use std::str::FromStr; +use tide_disco::socket::Connection; +use tide_disco::{api::ApiError, Api}; +use url::Url; +use vbs::version::{StaticVersion, StaticVersionType, Version}; + +/// CONSTANT for protocol major version +pub const VERSION_MAJ: u16 = 0; + +/// CONSTANT for protocol minor version +pub const VERSION_MIN: u16 = 1; + +pub const VERSION_0_1: Version = Version { + major: VERSION_MAJ, + minor: VERSION_MIN, +}; + +/// Constant for the version of this API. +pub const BASE_VERSION: Version = VERSION_0_1; + +/// Specific type for version 0.1 +pub type Version01 = StaticVersion; + +// Static instance of the Version01 type +pub const STATIC_VER_0_1: Version01 = StaticVersion {}; + +#[derive(Debug, Serialize, Deserialize)] +pub enum Error { + UnhandledTideDisco(tide_disco::StatusCode, String), + UnhandledSurfDisco(surf_disco::StatusCode, String), +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::UnhandledSurfDisco(status, msg) => { + write!(f, "Unhandled Surf Disco Error: {} - {}", status, msg) + } + + Self::UnhandledTideDisco(status, msg) => { + write!(f, "Unhandled Tide Disco Error: {} - {}", status, msg) + } + } + } +} + +impl std::error::Error for Error {} + +impl tide_disco::Error for Error { + fn catch_all(status: tide_disco::StatusCode, msg: String) -> Self { + Self::UnhandledTideDisco(status, msg) + } + + fn status(&self) -> tide_disco::StatusCode { + tide_disco::StatusCode::INTERNAL_SERVER_ERROR + } +} + +#[derive(Debug)] +pub enum LoadApiError { + Toml(toml::de::Error), + Api(ApiError), +} + +impl From for LoadApiError { + fn from(err: toml::de::Error) -> Self { + LoadApiError::Toml(err) + } +} + +impl From for LoadApiError { + fn from(err: ApiError) -> Self { + LoadApiError::Api(err) + } +} + +pub(crate) fn load_api( + default: &str, +) -> Result, LoadApiError> { + let toml: toml::Value = toml::from_str(default)?; + Ok(Api::new(toml)?) +} + +#[derive(Debug)] +pub enum LoadTomlError { + Io(std::io::Error), + Toml(toml::de::Error), + Utf8(std::str::Utf8Error), +} + +impl From for LoadTomlError { + fn from(err: std::io::Error) -> Self { + LoadTomlError::Io(err) + } +} + +impl From for LoadTomlError { + fn from(err: toml::de::Error) -> Self { + LoadTomlError::Toml(err) + } +} + +impl From for LoadTomlError { + fn from(err: std::str::Utf8Error) -> Self { + LoadTomlError::Utf8(err) + } +} + +#[derive(Debug)] +pub enum DefineApiError { + LoadApiError(LoadApiError), + LoadTomlError(LoadTomlError), + ApiError(ApiError), +} + +impl From for DefineApiError { + fn from(err: LoadApiError) -> Self { + DefineApiError::LoadApiError(err) + } +} + +impl From for DefineApiError { + fn from(err: LoadTomlError) -> Self { + DefineApiError::LoadTomlError(err) + } +} + +impl From for DefineApiError { + fn from(err: ApiError) -> Self { + DefineApiError::ApiError(err) + } +} + +/// [StateClientMessageSender] allows for the retrieval of a [Sender] for sending +/// messages received from the client to the Server for request processing. +pub trait StateClientMessageSender { + fn sender(&self) -> Sender>; +} + +#[derive(Debug)] +pub enum EndpointError {} + +pub fn define_api() -> Result, DefineApiError> +where + State: StateClientMessageSender> + Send + Sync + 'static, +{ + let mut api = load_api::(include_str!("./node_validator.toml"))?; + + api.with_version("0.0.1".parse().unwrap()).socket( + "details", + move |_req, socket: Connection, state| { + async move { + let mut socket_stream = socket.clone(); + let mut socket_sink = socket; + + let mut internal_client_message_sender = state.sender(); + let (server_message_sender, mut server_message_receiver) = mpsc::channel(32); + + // Let's register ourselves with the Server + if let Err(err) = internal_client_message_sender + .send(InternalClientMessage::Connected(server_message_sender)) + .await + { + // This means that the client_message_sender is closed + // we need to exit the stream. + tracing::info!( + "client message sender is closed before first message: {}", + err + ); + return Ok(()); + } + + // We should receive a response from the server that identifies us + // uniquely. + let client_id = if let Some(ServerMessage::YouAre(client_id)) = + server_message_receiver.next().await + { + client_id + } else { + // The channel is closed, and this client should be removed + // we need to exit the stream + tracing::info!("server message receiver closed before first message",); + return Ok(()); + }; + + // We want to start these futures outside of the loop. If we + // don't do this then every iteration will not be guaranteed + // to not skip a message. + let mut next_client_message = socket_stream.next(); + let mut next_server_message = server_message_receiver.next(); + + loop { + match futures::future::select(next_client_message, next_server_message).await { + Either::Left((client_request, remaining_server_message)) => { + let client_request = if let Some(client_request) = client_request { + client_request + } else { + // The client has disconnected, we need to exit the stream + tracing::info!("client message has disconnected"); + break; + }; + + let client_request = if let Ok(client_request) = client_request { + client_request + } else { + // This indicates that there was a more + // specific error with the socket message. + // This error can be various, and may be + // recoverable depending on the actual nature + // of the error. We will treat it as + // unrecoverable for now. + break; + }; + + let internal_client_message = + client_request.to_internal_with_client_id(client_id); + if let Err(err) = internal_client_message_sender + .send(internal_client_message) + .await + { + // This means that the client_message_sender is closed + tracing::info!("client message sender is closed: {}", err); + break; + } + + // let's queue up the next client message to receive + next_client_message = socket_stream.next(); + next_server_message = remaining_server_message; + } + Either::Right((server_message, remaining_client_message)) => { + // Alright, we have a server message, we want to forward it + // to the down-stream client. + + let server_message = if let Some(server_message) = server_message { + server_message + } else { + // The server has disconnected, we need to exit the stream + break; + }; + + // We want to forward the message to the client + if let Err(err) = socket_sink.send(&server_message).await { + // This means that the socket is closed + tracing::info!("socket is closed: {}", err); + break; + } + + // let's queue up the next server message to receive + next_server_message = server_message_receiver.next(); + next_client_message = remaining_client_message; + } + } + } + + // We don't actually care if this fails or not, as we're exiting + // this function anyway, and these Senders and Receivers will + // automatically be dropped. + _ = internal_client_message_sender + .send(InternalClientMessage::Disconnected(client_id)) + .await; + + Ok(()) + } + .boxed() + }, + )?; + Ok(api) +} + +#[derive(Debug, Deserialize)] +pub struct PublishHotShotConfig { + pub known_nodes_with_stake: Vec>, +} + +#[derive(Debug, Deserialize)] +pub struct SequencerConfig { + pub config: PublishHotShotConfig, +} + +/// [get_stake_table_from_sequencer] retrieves the stake table from the +/// Sequencer. It expects a [surf_disco::Client] to be provided so that it can +/// make the request to the Hotshot Query Service. It will return a +/// [StakeTable] that is populated with the data retrieved from the Hotshot +/// Query Service. +pub async fn get_stake_table_from_sequencer( + client: surf_disco::Client, +) -> Result, hotshot_query_service::Error> { + let request = client + .get("config/hotshot") + // We need to set the Accept header, otherwise the Content-Type + // will be application/octet-stream, and we won't be able to + // deserialize the response. + .header("Accept", "application/json"); + let stake_table_result = request.send().await; + + let sequencer_config: SequencerConfig = match stake_table_result { + Ok(public_hot_shot_config) => public_hot_shot_config, + Err(err) => { + tracing::info!("retrieve stake table request failed: {}", err); + return Err(err); + } + }; + + let public_hot_shot_config = sequencer_config.config; + + let mut stake_table = StakeTable::::new( + public_hot_shot_config.known_nodes_with_stake.len(), + ); + + for node in public_hot_shot_config.known_nodes_with_stake.into_iter() { + stake_table + .register( + *node.stake_table_entry.key(), + node.stake_table_entry.stake(), + node.state_ver_key, + ) + .expect("registering stake table entry"); + } + + stake_table.advance(); + stake_table.advance(); + + Ok(stake_table) +} + +pub enum GetNodeIdentityFromUrlError { + Url(url::ParseError), + Reqwest(reqwest::Error), + Io(std::io::Error), + NoNodeIdentity, +} + +impl std::fmt::Display for GetNodeIdentityFromUrlError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + GetNodeIdentityFromUrlError::Url(err) => write!(f, "url: {}", err), + GetNodeIdentityFromUrlError::Reqwest(err) => write!(f, "reqwest error: {}", err), + GetNodeIdentityFromUrlError::Io(err) => write!(f, "io error: {}", err), + GetNodeIdentityFromUrlError::NoNodeIdentity => write!(f, "no node identity"), + } + } +} + +impl From for GetNodeIdentityFromUrlError { + fn from(err: url::ParseError) -> Self { + GetNodeIdentityFromUrlError::Url(err) + } +} + +impl From for GetNodeIdentityFromUrlError { + fn from(err: reqwest::Error) -> Self { + GetNodeIdentityFromUrlError::Reqwest(err) + } +} + +impl From for GetNodeIdentityFromUrlError { + fn from(err: std::io::Error) -> Self { + GetNodeIdentityFromUrlError::Io(err) + } +} + +/// [get_node_identity_from_url] retrieves a [NodeIdentity] from a URL. It +/// expects a [url::Url] to be provided so that it can make the request to the +/// Sequencer status metrics API. It will return a [NodeIdentity] that is +/// populated with the data retrieved from the Sequencer status metrics API. +/// If no [NodeIdentity] is found, it will return a +/// [GetNodeIdentityFromUrlError::NoNodeIdentity] error. +pub async fn get_node_identity_from_url( + url: url::Url, +) -> Result { + let client = reqwest::Client::new(); + + let completed_url = url.join("v0/status/metrics")?; + let request = client.get(completed_url).build()?; + let response = client.execute(request).await?; + let response_bytes = response.bytes().await?; + + let buffered_response = std::io::BufReader::new(&*response_bytes); + let scrape = prometheus_parse::Scrape::parse(buffered_response.lines())?; + + if let Some(node_identity) = node_identity_from_scrape(scrape) { + let mut node_identity = node_identity; + node_identity.public_url = Some(url); + Ok(node_identity) + } else { + Err(GetNodeIdentityFromUrlError::NoNodeIdentity) + } +} + +/// LeafStreamRetriever is a general trait that allows for the retrieval of a +/// list of Leaves from a source. The specific implementation doesn't care about +/// the source, only that it is able to retrieve a stream of Leaves. +/// +/// This allows us to swap the implementation of the [LeafStreamRetriever] for +/// testing purposes, or for newer sources in the future. +pub trait LeafStreamRetriever: Send { + type Item; + type ItemError: std::error::Error + Send; + type Error: std::error::Error + Send; + type Stream: Stream> + Send + Unpin; + type Future: Future> + Send; + + /// [retrieve_stream] retrieves a stream of [Leaf]s from the source. It + /// expects the current block height to be provided so that it can determine + /// the starting block height to retrieve the stream of [Leaf]s from. + /// + /// It should check the current height of the chain so that it only needs + /// to retrieve the number of older blocks that are needed, instead of + /// starting from the beginning of time. + fn retrieve_stream(&self, current_block_height: Option) -> Self::Future; +} + +/// [HotshotQueryServiceLeafStreamRetriever] is a [LeafStreamRetriever] that +/// retrieves a stream of [Leaf]s from the Hotshot Query Service. It expects +/// the base URL of the Hotshot Query Service to be provided so that it can +/// make the request to the Hotshot Query Service. +pub struct HotshotQueryServiceLeafStreamRetriever { + base_url: Url, +} + +impl HotshotQueryServiceLeafStreamRetriever { + /// [new] creates a new [HotshotQueryServiceLeafStreamRetriever] that + /// will use the given base [Url] to be able to retrieve the stream of + /// [Leaf]s from the Hotshot Query Service. + /// + /// The [Url] is expected to point to the the API version root of the + /// Hotshot Query Service. Example: + /// https://example.com/v0 + pub fn new(base_url: Url) -> Self { + Self { base_url } + } +} + +impl LeafStreamRetriever for HotshotQueryServiceLeafStreamRetriever { + type Item = Leaf; + type ItemError = hotshot_query_service::Error; + type Error = hotshot_query_service::Error; + type Stream = surf_disco::socket::Connection< + Leaf, + surf_disco::socket::Unsupported, + Self::ItemError, + Version01, + >; + type Future = Pin> + Send>>; + + fn retrieve_stream(&self, current_block_height: Option) -> Self::Future { + let client = surf_disco::Client::new(self.base_url.clone()); + async move { + let block_height_result = client.get("status/block-height").send().await; + let block_height: u64 = match block_height_result { + Ok(block_height) => block_height, + Err(err) => { + tracing::info!("retrieve block height request failed: {}", err); + return Err(err); + } + }; + + let latest_block_start = block_height.saturating_sub(50); + let start_block_height = if let Some(known_height) = current_block_height { + std::cmp::min(known_height, latest_block_start) + } else { + latest_block_start + }; + + let leaves_stream_result = client + .socket(&format!( + "availability/stream/leaves/{}", + start_block_height + )) + .subscribe::() + .await; + + let leaves_stream = match leaves_stream_result { + Ok(leaves_stream) => leaves_stream, + Err(err) => { + tracing::info!("retrieve leaves stream failed: {}", err); + return Err(err); + } + }; + + Ok(leaves_stream) + } + .boxed() + } +} + +/// [ProcessProduceLeafStreamTask] is a task that produce a stream of [Leaf]s +/// from the Hotshot Query Service. It will attempt to retrieve the [Leaf]s +/// from the Hotshot Query Service and then send them to the [Sink] provided. +pub struct ProcessProduceLeafStreamTask { + pub task_handle: Option>, +} + +impl ProcessProduceLeafStreamTask { + /// [new] creates a new [ProcessConsumeLeafStreamTask] that produces a + /// stream of [Leaf]s from the Hotshot Query Service. + /// + /// Calling this function will create an async task that will start + /// processing immediately. The task's handle will be stored in the + /// returned state. + pub fn new(leaf_stream_retriever: R, leaf_sender: K) -> Self + where + R: LeafStreamRetriever> + 'static, + K: Sink, Error = SendError> + Clone + Send + Sync + Unpin + 'static, + { + // let future = Self::process_consume_leaf_stream(leaf_stream_retriever, leaf_sender); + let task_handle = async_std::task::spawn(Self::process_consume_leaf_stream( + leaf_stream_retriever, + leaf_sender, + )); + + Self { + task_handle: Some(task_handle), + } + } + + /// [process_consume_leaf_stream] produces a stream of [Leaf]s from the + /// Hotshot Query Service. It will attempt to retrieve the [Leaf]s from the + /// Hotshot Query Service and then send them to the [Sink] provided. If the + /// [Sink] is closed, or if the Stream ends prematurely, then the function + /// will return. + async fn process_consume_leaf_stream(leaf_stream_retriever: R, leaf_sender: K) + where + R: LeafStreamRetriever>, + K: Sink, Error = SendError> + Clone + Send + Sync + Unpin + 'static, + { + // Alright, let's start processing leaves + // TODO: implement retry logic with backoff and ultimately fail if + // unable to retrieve the stream within a time frame. + let leaves_stream_result = leaf_stream_retriever.retrieve_stream(None).await; + let leaves_stream = match leaves_stream_result { + Ok(leaves_stream) => leaves_stream, + Err(err) => { + tracing::info!("retrieve leaves stream failed: {}", err); + return; + } + }; + + let mut leaf_sender = leaf_sender; + let mut leaves_stream = leaves_stream; + + loop { + let leaf_result = leaves_stream.next().await; + let leaf = if let Some(Ok(leaf)) = leaf_result { + leaf + } else { + tracing::info!("leaf stream closed"); + break; + }; + + let leaf_send_result = leaf_sender.send(leaf).await; + if let Err(err) = leaf_send_result { + tracing::info!("leaf sender closed: {}", err); + break; + } + } + } +} + +/// [Drop] implementation for [ProcessConsumeLeafStreamTask] that will cancel +/// the task if it hasn't already been completed. +impl Drop for ProcessProduceLeafStreamTask { + fn drop(&mut self) { + if let Some(task_handle) = self.task_handle.take() { + async_std::task::block_on(task_handle.cancel()); + } + } +} + +/// [populate_node_identity_general_from_scrape] populates the general +/// information of a [NodeIdentity] from a [Sample] that is expected to be +/// the "consensus_node_identity_general" sample. +fn populate_node_identity_general_from_scrape( + node_identity: &mut NodeIdentity, + node_identity_general_sample: &Sample, +) { + node_identity.name = node_identity_general_sample + .labels + .get("name") + .map(|s| s.into()); + node_identity.company = node_identity_general_sample + .labels + .get("company_name") + .map(|s| s.into()); + let company_website = match node_identity_general_sample + .labels + .get("company_website") + .map(Url::parse) + { + Some(Ok(url)) => Some(url), + _ => None, + }; + node_identity.company_website = company_website; + node_identity.network_type = node_identity_general_sample + .labels + .get("network_type") + .map(|s| s.into()); + node_identity.node_type = node_identity_general_sample + .labels + .get("node_type") + .map(|s| s.into()); + node_identity.operating_system = node_identity_general_sample + .labels + .get("operating_system") + .map(|s| s.into()); +} + +/// [populate_node_location_from_scrape] populates the location information of a +/// [NodeIdentity] from a [Sample] that is expected to be the +/// "consensus_node_identity_location" sample. +fn populate_node_location_from_scrape( + node_identity: &mut NodeIdentity, + node_identity_location_sample: &Sample, +) { + let mut location = node_identity + .location + .take() + .unwrap_or(LocationDetails::new(None, None)); + location.country = node_identity_location_sample + .labels + .get("country") + .map(|s| s.into()); + + let latitude = node_identity_location_sample + .labels + .get("latitude") + .map(|s| s.parse::()); + let longitude = node_identity_location_sample + .labels + .get("longitude") + .map(|s| s.parse::()); + + if let (Some(Ok(latitude)), Some(Ok(longitude))) = (latitude, longitude) { + location.coords = Some((latitude, longitude)); + } + + // Are there any details populated? + if location.country.is_some() || location.coords.is_some() { + node_identity.location = Some(location); + } else { + node_identity.location = None; + } +} + +/// [populate_node_identity_from_scrape] populates a [NodeIdentity] from a +/// [Scrape] that is expected to contain the necessary information to populate +/// the [NodeIdentity]. +pub fn populate_node_identity_from_scrape(node_identity: &mut NodeIdentity, scrape: Scrape) { + // Handle General Information Population + + // Let's verify that the scrape information contains and matches our node + // identity's public key. + { + let node_key = scrape + .docs + .iter() + .find(|(_, key)| key == &"node") + .map(|(key, _)| key); + + let node_key = if let Some(node_key) = node_key { + node_key + } else { + // We were unable to find the key for the public key on the metrics + // scrape result. + tracing::warn!("scrape result doesn't seem to contain 'node' key, preventing us from verifying the public key"); + return; + }; + + let node_sample = scrape + .samples + .iter() + .find(|sample| &sample.metric == node_key); + + let node_sample = if let Some(node_sample) = node_sample { + node_sample + } else { + // We were unable to find the sample for the public key on the metrics + // scrape result. + tracing::warn!("scrape result doesn't seem to contain 'node' sample, preventing us from verifying the public key. This is especially odd considering that we found the 'node' key already."); + return; + }; + + let public_key_string = node_sample.labels.get("key"); + + let public_key_from_scrape: BLSPubKey = if let Some(public_key_string) = public_key_string { + match BLSPubKey::from_str(public_key_string) { + Ok(public_key) => public_key, + Err(err) => { + // We couldn't parse the public key, so we can't create a NodeIdentity. + tracing::info!("parsing public key failed: {}", err); + return; + } + } + } else { + // We were unable to find the public key in the scrape result. + tracing::warn!("scrape result doesn't seem to contain 'key' label in the 'node' sample, preventing us from verifying the public key. This is especially odd considering that we found the 'node' key and sample already."); + return; + }; + + let public_key_from_scrape_string = public_key_from_scrape.to_string(); + let node_identity_public_key_string = node_identity.public_key().to_string(); + + if public_key_from_scrape_string != node_identity_public_key_string { + tracing::warn!("node identity public key doesn't match public key in scrape, are we hitting the wrong URL, or is it behind a load balancer between multiple nodes?"); + return; + } + + debug_assert_eq!(&public_key_from_scrape, node_identity.public_key()); + } + + // Determine the key for the "consensus_node_identity_general" sample + // so we can populate the general information concerning node identity. + let node_identity_general_key = scrape + .docs + .iter() + .find(|(_, key)| key == &"node_identity_general") + .map(|(key, _)| key); + + if let Some(node_identity_general_key) = node_identity_general_key { + let node_identity_general_sample = scrape + .samples + .iter() + .find(|sample| &sample.metric == node_identity_general_key); + + if let Some(node_identity_general_sample) = node_identity_general_sample { + populate_node_identity_general_from_scrape(node_identity, node_identity_general_sample); + } + } + + // Lookup node identity location information, so we can populate it. + let node_identity_location_key = scrape + .docs + .iter() + .find(|(_, key)| key == &"node_identity_location") + .map(|(key, _)| key); + if let Some(node_identity_location_key) = node_identity_location_key { + let node_identity_location_sample = scrape + .samples + .iter() + .find(|sample| &sample.metric == node_identity_location_key); + + if let Some(node_identity_location_sample) = node_identity_location_sample { + populate_node_location_from_scrape(node_identity, node_identity_location_sample); + } + } +} + +/// [node_identity_from_scrape] creates a [NodeIdentity] from a [Scrape]. It +/// expects the [Scrape] to contain the necessary information to populate the +/// [NodeIdentity]. If the [Scrape] doesn't contain the necessary information +/// to populate the [NodeIdentity], then it will return [None]. +pub fn node_identity_from_scrape(scrape: Scrape) -> Option { + let node_key = scrape + .docs + .iter() + .find(|(_, key)| key == &"node") + .map(|(key, _)| key); + + let node_key = node_key?; + + let node_sample = scrape + .samples + .iter() + .find(|sample| &sample.metric == node_key); + + let node_sample = node_sample?; + + let public_key_string = node_sample.labels.get("key")?; + + let public_key = match BLSPubKey::from_str(public_key_string) { + Ok(public_key) => public_key, + Err(err) => { + tracing::info!("parsing public key failed: {}", err); + return None; + } + }; + + let mut node_identity = NodeIdentity::from_public_key(public_key); + populate_node_identity_from_scrape(&mut node_identity, scrape); + + Some(node_identity) +} + +/// [ProcessNodeIdentityUrlStreamTask] is a task that processes a stream of +/// [Url]s that are expected to contain a Node Identity. It will attempt to +/// retrieve the Node Identity from the [Url] and then send it to the [Sink] +/// provided. +pub struct ProcessNodeIdentityUrlStreamTask { + pub task_handle: Option>, +} + +impl ProcessNodeIdentityUrlStreamTask { + /// [new] creates a new [ProcessNodeIdentityUrlStreamTask] that processes a + /// stream of [Url]s that are expected to contain a Node Identity. + /// + /// Calling this function will spawn a new task that will start processing + /// immediately. The tasks' handle will be stored in the returned + /// state. + pub fn new(url_receiver: S, node_identity_sender: K) -> Self + where + S: Stream + Send + Sync + Unpin + 'static, + K: Sink + Clone + Send + Sync + Unpin + 'static, + { + let task_handle = async_std::task::spawn(Self::process_node_identity_url_stream( + url_receiver, + node_identity_sender, + )); + + Self { + task_handle: Some(task_handle), + } + } + + /// [process_node_identity_url_stream] processes a stream of [Url]s that are + /// expected to contain a Node Identity. It will attempt to retrieve the Node + /// Identity from the [Url] and then send it to the [Sink] provided. If the + /// [Sink] is closed, then the function will return. + async fn process_node_identity_url_stream( + node_identity_url_stream: T, + node_identity_sink: K, + ) where + T: futures::Stream + Unpin, + K: Sink + Unpin, + { + let mut node_identity_url_stream = node_identity_url_stream; + let mut node_identity_sender = node_identity_sink; + loop { + let node_identity_url_result = node_identity_url_stream.next().await; + let node_identity_url = match node_identity_url_result { + Some(node_identity_url) => node_identity_url, + None => { + tracing::info!("node identity url stream closed"); + return; + } + }; + + // Alright we have a new Url to try and scrape for a Node Identity. + // Let's attempt to do that. + let node_identity_result = get_node_identity_from_url(node_identity_url).await; + + let node_identity = match node_identity_result { + Ok(node_identity) => node_identity, + Err(err) => { + tracing::warn!("get node identity from url failed. bad base url?: {}", err); + continue; + } + }; + + let send_result = node_identity_sender.send(node_identity).await; + if let Err(err) = send_result { + tracing::error!("node identity sender closed: {}", err); + + // We will be unable to provide any additional node identity + // updates. This is considered a critical error. + panic!("ProcessNodeIdentityUrlStreamTask node_identity_sender closed, future node identity information will stagnate: {}", err); + } + } + } +} + +/// [ProcessNodeIdentityUrlStreamTask] will cancel the task when it is dropped. +impl Drop for ProcessNodeIdentityUrlStreamTask { + fn drop(&mut self) { + let task_handle = self.task_handle.take(); + if let Some(task_handle) = task_handle { + async_std::task::block_on(task_handle.cancel()); + } + } +} + +#[cfg(test)] +mod tests { + use std::io::{BufRead, BufReader}; + + fn example_prometheus_output() -> &'static str { + include_str!("example_prometheus_metrics_output.txt") + } + + #[test] + fn test_prometheus_scraping_example() { + let example_input = example_prometheus_output(); + + let buffered_reader = BufReader::new(example_input.as_bytes()); + let lines = buffered_reader.lines(); + + let scrape_result = prometheus_parse::Scrape::parse(lines); + + assert!(scrape_result.is_ok()); + let scrape = scrape_result.unwrap(); + + let node_identity_general_key = scrape + .docs + .iter() + .find(|(_, key)| key == &"node_identity_general") + .map(|(key, _)| key); + let node_identity_location_key = scrape + .docs + .iter() + .find(|(_, key)| key == &"node_identity_location") + .map(|(key, _)| key); + + assert!(node_identity_general_key.is_some()); + assert!(node_identity_location_key.is_some()); + + let node_identity_general_key = node_identity_general_key.unwrap(); + let node_identity_location_key = node_identity_location_key.unwrap(); + + // Let's look for the general_info + let node_identity_general = scrape + .samples + .iter() + .find(|sample| &sample.metric == node_identity_general_key); + + let node_identity_location = scrape + .samples + .iter() + .find(|sample| &sample.metric == node_identity_location_key); + + assert!(node_identity_general.is_some()); + assert!(node_identity_location.is_some()); + + let node_identity_general = node_identity_general.unwrap(); + let node_identity_location = node_identity_location.unwrap(); + + assert_eq!( + node_identity_general.labels.get("company_name"), + Some("Espresso Systems") + ); + assert_eq!(node_identity_general.labels.get("name"), Some("sequencer0")); + assert_eq!( + node_identity_general.labels.get("network_type"), + Some("local") + ); + assert_eq!( + node_identity_general.labels.get("node_type"), + Some("espresso-sequencer 0.1") + ); + assert_eq!( + node_identity_general.labels.get("node_type"), + Some("espresso-sequencer 0.1") + ); + assert_eq!( + node_identity_general.labels.get("operating_system"), + Some("Linux 5.15.153.1") + ); + assert_eq!( + node_identity_general.labels.get("wallet"), + Some("0x0000000000000000000000000000000000000000") + ); + + assert_eq!(node_identity_location.labels.get("country"), Some("US")); + assert_eq!( + node_identity_location.labels.get("latitude"), + Some("-40.7128") + ); + assert_eq!( + node_identity_location.labels.get("longitude"), + Some("-74.0060") + ); + + print!("{:?}", scrape); + } + + #[test] + fn test_node_identity_from_scrape() { + let example_input = example_prometheus_output(); + + let buffered_reader = BufReader::new(example_input.as_bytes()); + let lines = buffered_reader.lines(); + + let scrape_result = prometheus_parse::Scrape::parse(lines); + + assert!(scrape_result.is_ok()); + let scrape = scrape_result.unwrap(); + + let node_identity = super::node_identity_from_scrape(scrape); + + assert!(node_identity.is_some()); + let node_identity = node_identity.unwrap(); + + assert_eq!( + node_identity.company(), + &Some("Espresso Systems".to_string()) + ); + assert_eq!(node_identity.name(), &Some("sequencer0".to_string())); + assert_eq!(node_identity.network_type(), &Some("local".to_string())); + assert_eq!( + node_identity.node_type(), + &Some("espresso-sequencer 0.1".to_string()) + ); + assert_eq!( + node_identity.operating_system(), + &Some("Linux 5.15.153.1".to_string()) + ); + + assert!(node_identity.location().is_some()); + let node_identity_location = node_identity.location().unwrap(); + + assert_eq!(node_identity_location.country(), &Some("US".to_string())); + assert_eq!(node_identity_location.coords, Some((-40.7128, -74.0060))); + } +} diff --git a/node-metrics/src/api/node_validator/v0/node_validator.toml b/node-metrics/src/api/node_validator/v0/node_validator.toml new file mode 100644 index 000000000..6bf0383bd --- /dev/null +++ b/node-metrics/src/api/node_validator/v0/node_validator.toml @@ -0,0 +1,39 @@ +# Copyright (c) 2022 Espresso Systems (espressosys.com) +# This file is part of the HotShot Query Service library. +# +# This program is free software: you can redistribute it and/or modify it under the terms of the GNU +# General Public License as published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# You should have received a copy of the GNU General Public License along with this program. If not, +# see . + +[meta] +FORMAT_VERSION = "0.1.0" +NAME = "node-validator" +DESCRIPTION = """ +The node-validator API provides an endpoint that allows for the near real-time +streaming of the HotShot blockchain, and the sequencer nodes that are connected +and contributing to the HotShot blockchain. + +The data that is provided by this API can be used to construct a dashboard to +provide near real-time updates / views of the current blockchain state and +nodes. + +Additionally, this gives participating nodes the ability to be identified and +represented for public view, and transparency. +""" + +[route.details] +PATH = ["details"] +METHOD = "SOCKET" +DOC = """ +The details endpoint allows a client to opt-in to specific stream updates in a +single connection. All information that is provided by this endpoint is opt-in. +It will only provide what is requested across the WebSocket. + +Opens a WebSocket connection that will send events and responses to specifically +requested data. +""" diff --git a/node-metrics/src/lib.rs b/node-metrics/src/lib.rs new file mode 100644 index 000000000..bc5f498f8 --- /dev/null +++ b/node-metrics/src/lib.rs @@ -0,0 +1,303 @@ +// Copyright (c) 2022 Espresso Systems (espressosys.com) +// This file is part of the HotShot Query Service library. +// +// This program is free software: you can redistribute it and/or modify it under the terms of the GNU +// General Public License as published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without +// even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// You should have received a copy of the GNU General Public License along with this program. If not, +// see . + +//! # Node Validator Service +//! +//! The Node Validator Service is a general purpose relay service that watches +//! data flow from the Hot Shot protocol via the CDN pub sub service. It +//! maintains a local state of the network map and is able to relay the +//! stored details to any client that requests it. In addition it is also +//! able to provide individual state change updates to any client that +//! subscribes to that particular event stream. In order to be able to +//! provide identity information to the clients, this identity information +//! must be volunteered by the nodes in the network. This requires the +//! nodes to be able to receive and respond to these requests, and relay +//! to anyone who desires it, the identity information of the node. +//! +//! ## Storage +//! +//! In order for this service to be effective and efficient it needs to be +//! able to store the state of the network in an efficient manner. The +//! storage should be fast and efficient. We are not expecting a lot of +//! data to be stored within this storage, but as things tend to grow and +//! change it may be necessary to have more robust storage mechanisms in +//! place, or even to have the ability to introduce new storage mechanisms. +//! In order to effectively store the data that we need to store, we need +//! to ask a fundamental question: +//! +//! What states do we need to track? +//! 1. Node Information +//! a. Node Identity Information +//! b. Node State Information (specifically voter participation, latest block +//! information, and staking information) +//! 2. Network Information +//! a. Latest Block +//! b. The most recent N blocks (N assumed to be 50 at the moment) +//! - Information can be derived from these most recent 50 blocks +//! that allows us to derive histogram data, producer data, and +//! the most recent block information. We might be able to get away +//! with just storing the header information of these blocks, since we +//! don't need the full block data. +//! c. The most recent N votes participants +//! d. The top block producers over the latest N blocks +//! e. Histogram data for the latest N blocks +//! - Block Size +//! - Block Time +//! - Block Space Used +//! +//! ## Data Streams +//! +//! In order for clients to be able to receive the information from the node +//! validator service, we need to be able to facilitate requests. We could +//! simply just start streaming data to the clients as soon as they connect, +//! however, this causes potential compatibility issues with the clients +//! in question. For example, if we want to add a new data stream that +//! can be retrieved for the client, and the client isn't expecting it, they +//! won't know how to handle the data, and it can potentially cause errors. +//! As such, it makes sense to only provide data streams when the client asks +//! for them. This allows for new features to be added to the data stream +//! without breaking compatibility with the clients, provided that the existing +//! streams don't change in a way that would break the client. +//! +//! Starting out, there doesn't need to be a lot of data that needs to be +//! streamed to to the client. In fact, we might be able to be a little +//! naive about this, and broadcast general objects in an event stream, as +//! data may be derivable from the objects that are broadcast. For example, +//! if we start out by sending the latest N block information, the client +//! may be able to derive histogram data from that information, which would +//! prevent us from having to send and store the histogram data. However, +//! there may be some pieces of data that are lacking from this approach which +//! would require us to send out additional data streams. +//! +//! Ideally, we should strive for a balance between the data we store locally +//! and the data that we stream to the clients. In order to know what we +//! need to store, we need to know what data we are expecting the client to +//! consume, and which data can be derived for these purposes. +//! +//! What Data Streams do we need to provide to clients? +//! +//! 1. Node Information +//! a. Node Identity Information +//! - Should be able to be sent in an initial batch +//! - Should be able to send individual updates as they occur +//! b. Node State Information +//! - Should be able to be sent in an initial batch +//! - Should be able to send individual updates as they occur +//! c. Block Information +//! - Should be able to be sent in an initial batch +//! - Should be able to send individual updates as they occur + +pub mod api; +pub mod service; + +use crate::{ + api::node_validator::v0::{ + cdn::{BroadcastRollCallTask, CdnReceiveMessagesTask}, + create_node_validator_api::{create_node_validator_processing, NodeValidatorConfig}, + HotshotQueryServiceLeafStreamRetriever, ProcessProduceLeafStreamTask, + StateClientMessageSender, STATIC_VER_0_1, + }, + service::{client_message::InternalClientMessage, server_message::ServerMessage}, +}; +use clap::Parser; +use espresso_types::{PubKey, SeqTypes}; +use futures::channel::mpsc::{self, Sender}; +use hotshot::traits::implementations::{ + CdnMetricsValue, CdnTopic, PushCdnNetwork, WrappedSignatureKey, +}; +use hotshot_query_service::metrics::PrometheusMetrics; +use hotshot_types::traits::signature_key::BuilderSignatureKey; +use tide_disco::App; +use url::Url; + +/// Options represents the configuration options that are available for running +/// the node validator service via the [run_standalone_service] function. +/// These options are configurable via command line arguments or environment +/// variables. +#[derive(Parser, Clone, Debug)] +pub struct Options { + /// stake_table_source_based_url is the base URL for the config API + /// endpoint that is provided by Espresso Sequencers. + /// + /// This endpoint is expected to point to the version root path of the + /// URL. + /// Example: + /// - https://query.cappuccino.testnet.espresso.network/v0/ + #[clap(long, env = "ESPRESSO_NODE_VALIDATOR_STAKE_TABLE_SOURCE_BASE_URL")] + stake_table_source_base_url: Url, + + /// leaf_stream_base_url is the base URL for the availability API endpoint + /// that is capable of providing a stream of leaf data. + /// + /// This endpoint is expected to point to the version root path of the + /// URL. + /// Example: + /// - https://query.cappuccino.testnet.espresso.network/v0/ + /// + #[clap(long, env = "ESPRESSO_NODE_VALIDATOR_LEAF_STREAM_SOURCE_BASE_URL")] + leaf_stream_base_url: Url, + + /// initial_node_public_base_urls is a list of URLs that are the initial + /// public base URLs of the nodes that are in the network. These can be + /// supplied as an initial source of URLS to scrape for node identity. + /// + /// These urls are expected to point to the root path of the URL for the + /// node, and are expected to be URLS that support the status endpoint + /// for the nodes. + /// + /// Example URL: + /// - https://query-1.cappuccino.testnet.espresso.network/ + #[clap( + long, + env = "ESPRESSO_NODE_VALIDATOR_INITIAL_NODE_PUBLIC_BASE_URLS", + value_delimiter = ',' + )] + initial_node_public_base_urls: Vec, + + /// port is the port that the node validator service will listen on. + /// This port is expected to be a valid port number that is available + /// for the service to bind to. + #[clap( + long, + value_parser, + env = "ESPRESSO_NODE_VALIDATOR_PORT", + default_value = "9000" + )] + port: u16, + + /// cdn_marshal_endpoint is the endpoint for the CDN marshal service. + /// + /// This endpoint is optional, and if it is not provided, then the CDN + /// service will not be utilized. + #[clap(long, env = "ESPRESSO_NODE_VALIDATOR_CDN_MARSHAL_ENDPOINT")] + cdn_marshal_endpoint: Option, +} + +impl Options { + fn stake_table_source_base_url(&self) -> &Url { + &self.stake_table_source_base_url + } + + fn leaf_stream_base_url(&self) -> &Url { + &self.leaf_stream_base_url + } + + fn initial_node_public_base_urls(&self) -> &[Url] { + &self.initial_node_public_base_urls + } + + fn port(&self) -> u16 { + self.port + } + + fn cdn_marshal_endpoint(&self) -> &Option { + &self.cdn_marshal_endpoint + } +} + +/// MainState represents the State of the application this is available to +/// tide_disco. +struct MainState { + internal_client_message_sender: Sender>>, +} + +impl StateClientMessageSender> for MainState { + fn sender(&self) -> Sender>> { + self.internal_client_message_sender.clone() + } +} + +/// Run the service by itself. +/// +/// This function will run the node validator as its own service. It has some +/// options that allow it to be configured in order for it to operate +/// effectively. +pub async fn run_standalone_service(options: Options) { + let (internal_client_message_sender, internal_client_message_receiver) = mpsc::channel(32); + let state = MainState { + internal_client_message_sender, + }; + + let mut app: App<_, api::node_validator::v0::Error> = App::with_state(state); + let node_validator_api = + api::node_validator::v0::define_api().expect("error defining node validator api"); + + match app.register_module("node-validator", node_validator_api) { + Ok(_) => {} + Err(err) => { + panic!("error registering node validator api: {:?}", err); + } + } + + let (leaf_sender, leaf_receiver) = mpsc::channel(10); + + let _process_consume_leaves = ProcessProduceLeafStreamTask::new( + HotshotQueryServiceLeafStreamRetriever::new(options.leaf_stream_base_url().clone()), + leaf_sender, + ); + + let node_validator_task_state = match create_node_validator_processing( + NodeValidatorConfig { + stake_table_url_base: options.stake_table_source_base_url().clone(), + initial_node_public_base_urls: options.initial_node_public_base_urls().to_vec(), + }, + internal_client_message_receiver, + leaf_receiver, + ) + .await + { + Ok(node_validator_task_state) => node_validator_task_state, + + Err(err) => { + panic!("error defining node validator api: {:?}", err); + } + }; + + let _cdn_tasks = if let Some(cdn_broker_url_string) = options.cdn_marshal_endpoint() { + let (public_key, private_key) = PubKey::generated_from_seed_indexed([1; 32], 0); + let cdn_network_result = PushCdnNetwork::::new( + cdn_broker_url_string.to_string(), + vec![CdnTopic::Global], + hotshot::traits::implementations::KeyPair { + public_key: WrappedSignatureKey(public_key), + private_key: private_key.clone(), + }, + CdnMetricsValue::new(&PrometheusMetrics::default()), + ); + let cdn_network = match cdn_network_result { + Ok(cdn_network) => cdn_network, + Err(err) => { + panic!("error creating cdn network: {:?}", err); + } + }; + + let url_sender = node_validator_task_state.url_sender.clone(); + + let broadcast_cdn_network = cdn_network.clone(); + let cdn_receive_message_task = CdnReceiveMessagesTask::new(cdn_network, url_sender); + let broadcast_roll_call_task = + BroadcastRollCallTask::new(broadcast_cdn_network, public_key); + + Some((broadcast_roll_call_task, cdn_receive_message_task)) + } else { + None + }; + + let port = options.port(); + // We would like to wait until being signaled + let app_serve_handle = async_std::task::spawn(async move { + let app_serve_result = app.serve(format!("0.0.0.0:{}", port), STATIC_VER_0_1).await; + tracing::info!("app serve result: {:?}", app_serve_result); + }); + + app_serve_handle.await; +} diff --git a/node-metrics/src/main.rs b/node-metrics/src/main.rs new file mode 100644 index 000000000..0b02ee0e3 --- /dev/null +++ b/node-metrics/src/main.rs @@ -0,0 +1,11 @@ +use async_compatibility_layer::logging::{setup_backtrace, setup_logging}; +use clap::Parser; +use node_metrics::{run_standalone_service, Options}; + +#[async_std::main] +async fn main() { + setup_logging(); + setup_backtrace(); + + run_standalone_service(Options::parse()).await; +} diff --git a/node-metrics/src/service/client_id/mod.rs b/node-metrics/src/service/client_id/mod.rs new file mode 100644 index 000000000..11353b6e5 --- /dev/null +++ b/node-metrics/src/service/client_id/mod.rs @@ -0,0 +1,164 @@ +use serde::{Deserialize, Serialize}; +use std::ops::{Add, AddAssign}; + +/// [ClientId] represents the unique identifier for a client that is connected +/// to the server. +/// +/// Example: +/// ```rust +/// # use node_metrics::service::client_id::ClientId; +/// +/// let client_id = ClientId::from_count(1); +/// +/// # assert_eq!(ClientId::from_count(1), client_id); +/// let client_id_2 = client_id + 1; +/// +/// # assert_ne!(client_id, client_id_2); +/// +/// let mut client_id_3 = client_id; +/// client_id_3 += 1; +/// +/// # assert_eq!(client_id_2, client_id_3); +/// ``` +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub struct ClientId(u64); + +impl ClientId { + pub fn from_count(count: u64) -> Self { + ClientId(count) + } +} + +/// [Add] implements basic addition for [ClientId], which allows [u64]s to be +/// added to the [ClientId] for convenience. +/// +/// Example: +/// +/// ```rust +/// +/// # use node_metrics::service::client_id::ClientId; +/// +/// let client_id = ClientId::from_count(1); +/// let new_client_id = client_id + 1; +/// +/// # assert_eq!(ClientId::from_count(2), new_client_id); +/// # assert_ne!(client_id, new_client_id); +/// ``` +impl Add for ClientId { + type Output = Self; + + fn add(self, rhs: u64) -> Self::Output { + ClientId(self.0 + rhs) + } +} + +/// [AddAssign] implements basic addition for [ClientId], which allows [u64]s to +/// be added to the mutable [ClientId] for convenience. +/// +/// Example: +/// +/// ```rust +/// # use node_metrics::service::client_id::ClientId; +/// +/// let mut client_id = ClientId::from_count(1); +/// client_id += 1; +/// +/// # assert_eq!(ClientId::from_count(2), client_id); +/// ``` +impl AddAssign for ClientId { + fn add_assign(&mut self, rhs: u64) { + self.0 += rhs; + } +} + +#[cfg(test)] +mod tests { + use super::ClientId; + + #[test] + fn test_client_id_debug() { + let client_id = ClientId::from_count(1); + assert_eq!(format!("{:?}", client_id), "ClientId(1)"); + } + + #[test] + #[allow(clippy::clone_on_copy)] + fn test_client_id_clone() { + let client_id = ClientId::from_count(1); + let cloned_client_id = client_id.clone(); + assert_eq!(client_id, cloned_client_id); + } + + #[test] + fn test_client_id_partial_eq() { + let client_id_1 = ClientId::from_count(1); + let client_id_2 = ClientId::from_count(2); + let client_id_3 = ClientId::from_count(1); + + assert_ne!(client_id_1, client_id_2); + assert_eq!(client_id_1, client_id_3); + } + + #[test] + fn test_client_id_eq() { + let client_id_1 = ClientId::from_count(1); + + client_id_1.assert_receiver_is_total_eq(); + } + + #[test] + fn test_hash() { + use std::collections::hash_map::DefaultHasher; + use std::hash::{Hash, Hasher}; + + let hash_1 = { + let client_id = ClientId::from_count(1); + let mut hasher = DefaultHasher::new(); + client_id.hash(&mut hasher); + hasher.finish() + }; + + let hash_2 = { + let client_id = ClientId::from_count(2); + let mut hasher = DefaultHasher::new(); + client_id.hash(&mut hasher); + hasher.finish() + }; + + let hash_3 = { + let client_id = ClientId::from_count(1); + let mut hasher = DefaultHasher::new(); + client_id.hash(&mut hasher); + hasher.finish() + }; + + assert_eq!(hash_1, hash_3); + assert_ne!(hash_1, hash_2); + assert_ne!(hash_2, hash_3); + } + + #[test] + fn test_add() { + let client_id = ClientId::from_count(1); + let new_client_id = client_id + 1; + assert_eq!(new_client_id, ClientId::from_count(2)); + } + + #[test] + fn test_add_assign() { + let mut client_id = ClientId::from_count(1); + client_id += 1; + assert_eq!(client_id, ClientId::from_count(2)); + } + + #[test] + #[cfg(feature = "testing")] + fn test_serialization() { + use serde_json; + let client_id = ClientId::from_count(1); + let serialized = serde_json::to_string(&client_id).unwrap(); + let deserialized: ClientId = serde_json::from_str(&serialized).unwrap(); + + assert_eq!(deserialized, client_id); + } +} diff --git a/node-metrics/src/service/client_message/mod.rs b/node-metrics/src/service/client_message/mod.rs new file mode 100644 index 000000000..d19881430 --- /dev/null +++ b/node-metrics/src/service/client_message/mod.rs @@ -0,0 +1,229 @@ +use super::client_id::ClientId; +use serde::{Deserialize, Serialize}; + +/// [ClientMessage] represents the messages that the client can send to the +/// server for a request. +/// +#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)] +pub enum ClientMessage { + SubscribeLatestBlock, + SubscribeNodeIdentity, + SubscribeVoters, + + RequestBlocksSnapshot, + RequestNodeIdentitySnapshot, + RequestHistogramSnapshot, + RequestVotersSnapshot, +} + +/// InternalClientMessage represents the message requests that the client can +/// send to the server. These messages are request that the client can send +/// in order for the server to send back responses that correspond to the +/// request. +#[derive(Debug)] +pub enum InternalClientMessage { + Connected(K), + Disconnected(ClientId), + + Request(ClientId, ClientMessage), +} + +impl ClientMessage { + /// [to_internal_with_client_id] converts the [ClientMessage] into an + /// [InternalClientMessage] with the given [ClientId]. + pub fn to_internal_with_client_id(&self, client_id: ClientId) -> InternalClientMessage { + InternalClientMessage::Request(client_id, *self) + } +} + +#[cfg(test)] +mod tests { + use super::InternalClientMessage; + use super::*; + use crate::service::server_message::ServerMessage; + use futures::channel::mpsc::Sender; + use std::iter::zip; + + impl PartialEq for InternalClientMessage { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + // We don't care about the [Sender] here, as it is unable to be + // compared. + (Self::Connected(_), Self::Connected(_)) => true, + (Self::Disconnected(lhs), Self::Disconnected(rhs)) => lhs == rhs, + ( + Self::Request(lhs_client_id, lhs_message), + Self::Request(rhs_client_id, rhs_message), + ) => lhs_client_id == rhs_client_id && lhs_message == rhs_message, + _ => false, + } + } + } + + #[test] + fn test_client_message_partial_eq() { + let messages = [ + ClientMessage::SubscribeLatestBlock, + ClientMessage::SubscribeNodeIdentity, + ClientMessage::SubscribeVoters, + ClientMessage::RequestBlocksSnapshot, + ClientMessage::RequestNodeIdentitySnapshot, + ClientMessage::RequestHistogramSnapshot, + ]; + + for (l, r) in zip(messages.iter(), messages.iter()) { + assert_eq!(l, r); + } + + for i in 1..messages.len() { + for (l, r) in zip( + messages.iter(), + messages.iter().skip(i).chain(messages.iter().take(i)), + ) { + assert_ne!(l, r); + } + } + } + + #[test] + fn test_client_message_debug() { + let messages = [ + ClientMessage::SubscribeLatestBlock, + ClientMessage::SubscribeNodeIdentity, + ClientMessage::SubscribeVoters, + ClientMessage::RequestBlocksSnapshot, + ClientMessage::RequestNodeIdentitySnapshot, + ClientMessage::RequestHistogramSnapshot, + ]; + + for message in messages.iter() { + assert_eq!(format!("{:?}", message), format!("{:?}", message)); + } + } + + #[test] + #[cfg(feature = "testing")] + fn test_client_message_serialize() { + use serde_json; + + let messages = [ + ClientMessage::SubscribeLatestBlock, + ClientMessage::SubscribeNodeIdentity, + ClientMessage::SubscribeVoters, + ClientMessage::RequestBlocksSnapshot, + ClientMessage::RequestNodeIdentitySnapshot, + ClientMessage::RequestHistogramSnapshot, + ]; + + for message in messages.iter() { + let serialized = serde_json::to_string(message).unwrap(); + let deserialized: ClientMessage = serde_json::from_str(&serialized).unwrap(); + assert_eq!(*message, deserialized); + } + } + + #[test] + fn test_client_message_to_internal_with_client_id() { + let messages = [ + ClientMessage::SubscribeLatestBlock, + ClientMessage::SubscribeNodeIdentity, + ClientMessage::SubscribeVoters, + ClientMessage::RequestBlocksSnapshot, + ClientMessage::RequestNodeIdentitySnapshot, + ClientMessage::RequestHistogramSnapshot, + ]; + + for message in messages { + for i in 0..10 { + let client_id = ClientId::from_count(i); + let internal_client_message = + message.to_internal_with_client_id::>(client_id); + match internal_client_message { + InternalClientMessage::Request(id, _) => { + assert_eq!(id, client_id); + } + _ => panic!("Unexpected InternalClientMessage"), + } + } + } + } + + #[test] + fn test_internal_client_message_partial_eq() { + let (sender, _) = futures::channel::mpsc::channel::(1); + let messages = [ + InternalClientMessage::Connected(sender), + InternalClientMessage::Disconnected(ClientId::from_count(1)), + InternalClientMessage::Request( + ClientId::from_count(1), + ClientMessage::SubscribeLatestBlock, + ), + InternalClientMessage::Request( + ClientId::from_count(1), + ClientMessage::SubscribeNodeIdentity, + ), + InternalClientMessage::Request(ClientId::from_count(1), ClientMessage::SubscribeVoters), + InternalClientMessage::Request( + ClientId::from_count(1), + ClientMessage::RequestBlocksSnapshot, + ), + InternalClientMessage::Request( + ClientId::from_count(1), + ClientMessage::RequestNodeIdentitySnapshot, + ), + InternalClientMessage::Request( + ClientId::from_count(1), + ClientMessage::RequestHistogramSnapshot, + ), + ]; + + for (l, r) in zip(messages.iter(), messages.iter()) { + assert_eq!(l, r); + } + + for i in 1..messages.len() { + for (l, r) in zip( + messages.iter(), + messages.iter().skip(i).chain(messages.iter().take(i)), + ) { + assert_ne!(l, r); + } + } + + for j in 2..12 { + let iter_messages = [ + InternalClientMessage::Disconnected(ClientId::from_count(j)), + InternalClientMessage::Request( + ClientId::from_count(j), + ClientMessage::SubscribeLatestBlock, + ), + InternalClientMessage::Request( + ClientId::from_count(j), + ClientMessage::SubscribeNodeIdentity, + ), + InternalClientMessage::Request( + ClientId::from_count(j), + ClientMessage::SubscribeVoters, + ), + InternalClientMessage::Request( + ClientId::from_count(j), + ClientMessage::RequestBlocksSnapshot, + ), + InternalClientMessage::Request( + ClientId::from_count(j), + ClientMessage::RequestNodeIdentitySnapshot, + ), + InternalClientMessage::Request( + ClientId::from_count(j), + ClientMessage::RequestHistogramSnapshot, + ), + ]; + + // We skip the first message, as we don't want to include the + // Connected message. + for (l, r) in zip(messages.iter().skip(1), iter_messages.iter()) { + assert_ne!(l, r); + } + } + } +} diff --git a/node-metrics/src/service/client_state/mod.rs b/node-metrics/src/service/client_state/mod.rs new file mode 100644 index 000000000..53092c13a --- /dev/null +++ b/node-metrics/src/service/client_state/mod.rs @@ -0,0 +1,2014 @@ +use super::{ + client_id::ClientId, + client_message::{ClientMessage, InternalClientMessage}, + data_state::{DataState, NodeIdentity}, + server_message::ServerMessage, +}; +use async_std::{ + sync::{RwLock, RwLockWriteGuard}, + task::JoinHandle, +}; +use bitvec::vec::BitVec; +use espresso_types::SeqTypes; +use futures::{channel::mpsc::SendError, Sink, SinkExt, Stream, StreamExt}; +use hotshot_query_service::explorer::{BlockDetail, ExplorerHistograms}; +use std::{ + collections::{HashMap, HashSet}, + sync::Arc, +}; + +/// ClientState represents the service state of the connected clients. +/// It maintains and represents the connected clients, and their subscriptions. +// This state is meant to be managed in a separate thread that assists with +// processing and updating of individual client states. +pub struct ClientState { + client_id: ClientId, + sender: K, +} + +impl ClientState { + /// Create a new ClientState with the given client_id and receiver. + pub fn new(client_id: ClientId, sender: K) -> Self { + Self { client_id, sender } + } + + pub fn client_id(&self) -> ClientId { + self.client_id + } + + pub fn sender(&self) -> &K { + &self.sender + } +} + +/// [ClientThreadState] represents the state of all of the active client +/// connections connected to the service. This state governs which clients +/// are connected, and what subscriptions they have setup. +pub struct ClientThreadState { + clients: HashMap>, + subscribed_latest_block: HashSet, + subscribed_node_identity: HashSet, + subscribed_voters: HashSet, + connection_id_counter: ClientId, +} + +impl ClientThreadState { + pub fn new( + clients: HashMap>, + subscribed_latest_block: HashSet, + subscribed_node_identity: HashSet, + subscribed_voters: HashSet, + connection_id_counter: ClientId, + ) -> Self { + Self { + clients, + subscribed_latest_block, + subscribed_node_identity, + subscribed_voters, + connection_id_counter, + } + } +} + +/// [drop_client_client_thread_state_write_guard] is a utility function for +/// cleaning up the [ClientThreadState] +fn drop_client_client_thread_state_write_guard( + client_id: &ClientId, + client_thread_state_write_guard: &mut RwLockWriteGuard>, +) -> Option> { + let client = client_thread_state_write_guard.clients.remove(client_id); + client_thread_state_write_guard + .subscribed_latest_block + .remove(client_id); + client_thread_state_write_guard + .subscribed_node_identity + .remove(client_id); + + client +} + +/// [drop_client_no_lock_guard] is a utility function for cleaning up the [ClientThreadState] +/// when a client is detected as disconnected. +async fn drop_client_no_lock_guard( + client_id: &ClientId, + client_thread_state: Arc>>, +) -> Option> { + let mut client_thread_state_write_lock_guard = client_thread_state.write().await; + + drop_client_client_thread_state_write_guard( + client_id, + &mut client_thread_state_write_lock_guard, + ) +} + +/// [HandleConnectedError] represents the scope of errors that can be +/// returned from the [handle_client_message_connected] function. +#[derive(Debug)] +pub enum HandleConnectedError { + ClientSendError(SendError), +} + +impl std::fmt::Display for HandleConnectedError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + HandleConnectedError::ClientSendError(err) => { + write!(f, "handle connected error: client send error: {}", err) + } + } + } +} + +impl std::error::Error for HandleConnectedError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + HandleConnectedError::ClientSendError(err) => Some(err), + } + } +} + +/// [handle_client_message_connected] is a function that processes the client +/// message to connect a client to the service. +pub async fn handle_client_message_connected( + mut sender: K, + client_thread_state: Arc>>, +) -> Result +where + K: Sink + Clone + Unpin, +{ + let mut client_thread_state_write_lock_guard = client_thread_state.write().await; + + client_thread_state_write_lock_guard.connection_id_counter += 1; + let client_id = client_thread_state_write_lock_guard.connection_id_counter; + + client_thread_state_write_lock_guard.clients.insert( + client_id, + ClientState { + client_id, + sender: sender.clone(), + }, + ); + + // Explicitly unlock + drop(client_thread_state_write_lock_guard); + + // Send the client their new id. + if let Err(err) = sender.send(ServerMessage::YouAre(client_id)).await { + // We need to remove drop the client now. + drop_client_no_lock_guard(&client_id, client_thread_state.clone()).await; + return Err(HandleConnectedError::ClientSendError(err)); + } + + Ok(client_id) +} + +/// [handle_client_message_disconnected] is a function that processes the client +/// message to disconnect a client from the service. +pub async fn handle_client_message_disconnected( + client_id: ClientId, + client_thread_state: Arc>>, +) { + // We might receive an implicit disconnect when attempting to + // send a message, as the receiving channel might be closed. + drop_client_no_lock_guard(&client_id, client_thread_state.clone()).await; +} + +/// [handle_client_message_subscribe_latest_block] is a function that processes +/// the client message to subscribe to the latest block stream. +pub async fn handle_client_message_subscribe_latest_block( + client_id: ClientId, + client_thread_state: Arc>>, +) { + let mut client_thread_state_write_lock_guard = client_thread_state.write().await; + + client_thread_state_write_lock_guard + .subscribed_latest_block + .insert(client_id); + + // Explicitly unlock + drop(client_thread_state_write_lock_guard); +} + +/// [handle_client_message_subscribe_node_identity] is a function that processes +/// the client message to subscribe to the node identity stream. +pub async fn handle_client_message_subscribe_node_identity( + client_id: ClientId, + client_thread_state: Arc>>, +) { + let mut client_thread_state_write_lock_guard = client_thread_state.write().await; + + client_thread_state_write_lock_guard + .subscribed_node_identity + .insert(client_id); + + // Explicitly unlock + drop(client_thread_state_write_lock_guard); +} + +/// [handle_client_message_subscribe_voters] is a function that processes +/// the client message to subscribe to the voters bitvecs. +pub async fn handle_client_message_subscribe_voters( + client_id: ClientId, + client_thread_state: Arc>>, +) { + let mut client_thread_state_write_lock_guard = client_thread_state.write().await; + + client_thread_state_write_lock_guard + .subscribed_voters + .insert(client_id); + + // Explicitly unlock + drop(client_thread_state_write_lock_guard); +} + +/// [HandleRequestBlocksSnapshotsError] represents the scope of errors that can +/// be returned from the [handle_client_message_request_blocks_snapshot] function. +#[derive(Debug)] +pub enum HandleRequestBlocksSnapshotsError { + ClientSendError(SendError), +} + +impl std::fmt::Display for HandleRequestBlocksSnapshotsError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + HandleRequestBlocksSnapshotsError::ClientSendError(err) => { + write!( + f, + "handle request blocks snapshot error: client send error:: {}", + err + ) + } + } + } +} + +impl std::error::Error for HandleRequestBlocksSnapshotsError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + HandleRequestBlocksSnapshotsError::ClientSendError(err) => Some(err), + } + } +} + +/// [handle_client_message_request_blocks_snapshot] is a function that processes +/// the client message request for a blocks snapshot. +pub async fn handle_client_message_request_blocks_snapshot( + client_id: ClientId, + data_state: Arc>, + client_thread_state: Arc>>, +) -> Result<(), HandleRequestBlocksSnapshotsError> +where + K: Sink + Clone + Unpin, +{ + let (client_thread_state_read_lock_guard, data_state_read_lock_guard) = + futures::join!(client_thread_state.read(), data_state.read()); + + let latest_blocks = data_state_read_lock_guard + .latest_blocks() + .map(|block| BlockDetail { + hash: block.hash, + proposer_id: block.proposer_id.clone(), + height: block.height, + size: block.size, + time: block.time, + num_transactions: block.num_transactions, + fee_recipient: block.fee_recipient.clone(), + block_reward: block.block_reward.clone(), + }) + .collect::>>(); + + if let Some(client) = client_thread_state_read_lock_guard.clients.get(&client_id) { + let mut sender = client.sender.clone(); + if let Err(err) = sender + .send(ServerMessage::BlocksSnapshot(Arc::new(latest_blocks))) + .await + { + drop_client_no_lock_guard(&client_id, client_thread_state.clone()).await; + return Err(HandleRequestBlocksSnapshotsError::ClientSendError(err)); + } + } + + Ok(()) +} + +/// [HandleRequestNodeIdentitySnapshotError] represents the scope of errors that +/// can be returned from the [handle_client_message_request_node_identity_snapshot] +/// function. +#[derive(Debug)] +pub enum HandleRequestNodeIdentitySnapshotError { + ClientSendError(SendError), +} + +impl std::fmt::Display for HandleRequestNodeIdentitySnapshotError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + HandleRequestNodeIdentitySnapshotError::ClientSendError(err) => { + write!( + f, + "handle request node identity snapshot error: client send error: {}", + err + ) + } + } + } +} + +impl std::error::Error for HandleRequestNodeIdentitySnapshotError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + HandleRequestNodeIdentitySnapshotError::ClientSendError(err) => Some(err), + } + } +} + +/// [handle_client_message_request_node_identity_snapshot] is a function that +/// processes the client message request for a node identity snapshot. +pub async fn handle_client_message_request_node_identity_snapshot( + client_id: ClientId, + data_state: Arc>, + client_thread_state: Arc>>, +) -> Result<(), HandleRequestNodeIdentitySnapshotError> +where + K: Sink + Clone + Unpin, +{ + // Let's send the current Blocks Snapshot to the client + let (client_thread_state_read_lock_guard, data_state_read_lock_guard) = + futures::join!(client_thread_state.read(), data_state.read()); + let client_result = client_thread_state_read_lock_guard.clients.get(&client_id); + if let Some(client) = client_result { + let mut sender = client.sender.clone(); + + // Let's copy the current node identity snapshot and send them + let nodes = data_state_read_lock_guard + .node_identity() + .cloned() + .collect::>(); + + if let Err(err) = sender + .send(ServerMessage::NodeIdentitySnapshot(Arc::new(nodes))) + .await + { + drop(client_thread_state_read_lock_guard); + drop_client_no_lock_guard(&client_id, client_thread_state.clone()).await; + return Err(HandleRequestNodeIdentitySnapshotError::ClientSendError(err)); + } + + return Ok(()); + } + + Ok(()) +} + +/// [HandleRequestHistogramSnapshotError] represents the scope of errors that +/// can be returned from the [handle_client_message_request_histogram_snapshot] +/// function. +#[derive(Debug)] +pub enum HandleRequestHistogramSnapshotError { + ClientSendError(SendError), +} + +impl std::fmt::Display for HandleRequestHistogramSnapshotError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + HandleRequestHistogramSnapshotError::ClientSendError(err) => { + write!( + f, + "handle request histogram snapshot error: client send error: {}", + err + ) + } + } + } +} + +impl std::error::Error for HandleRequestHistogramSnapshotError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + HandleRequestHistogramSnapshotError::ClientSendError(err) => Some(err), + } + } +} + +/// [handle_client_message_request_histogram_snapshot] is a function that +/// processes the client message request for a histogram snapshot. +pub async fn handle_client_message_request_histogram_snapshot( + client_id: ClientId, + data_state: Arc>, + client_thread_state: Arc>>, +) -> Result<(), HandleRequestHistogramSnapshotError> +where + K: Sink + Clone + Unpin, +{ + // Let's send the current histogram data snapshot to the client + let (client_thread_state_read_lock_guard, data_state_read_lock_guard) = + futures::join!(client_thread_state.read(), data_state.read()); + + let histogram_data = ExplorerHistograms { + block_size: data_state_read_lock_guard + .latest_blocks() + .skip(1) + .map(|block| block.size) + .collect(), + block_time: data_state_read_lock_guard + .latest_blocks() + .skip(1) + .zip(data_state_read_lock_guard.latest_blocks()) + .map(|(block_i, block_i_sub_1)| { + (block_i.time.0 - block_i_sub_1.time.0).whole_seconds() as u64 + }) + .collect(), + block_transactions: data_state_read_lock_guard + .latest_blocks() + .skip(1) + .map(|block| block.num_transactions) + .collect(), + block_heights: data_state_read_lock_guard + .latest_blocks() + .skip(1) + .map(|block| block.height) + .collect(), + }; + let arc_histogram_data = Arc::new(histogram_data); + drop(data_state_read_lock_guard); + + if let Some(client) = client_thread_state_read_lock_guard.clients.get(&client_id) { + let mut sender = client.sender.clone(); + drop(client_thread_state_read_lock_guard); + + if let Err(err) = sender + .send(ServerMessage::HistogramSnapshot(arc_histogram_data)) + .await + { + drop_client_no_lock_guard(&client_id, client_thread_state.clone()).await; + return Err(HandleRequestHistogramSnapshotError::ClientSendError(err)); + } + + return Ok(()); + } + + Ok(()) +} + +#[derive(Debug)] +pub enum HandleRequestVotersSnapshotError { + ClientSendError(SendError), +} + +impl std::fmt::Display for HandleRequestVotersSnapshotError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + HandleRequestVotersSnapshotError::ClientSendError(err) => { + write!( + f, + "handle request voters snapshot error: client send error: {}", + err + ) + } + } + } +} + +impl std::error::Error for HandleRequestVotersSnapshotError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + HandleRequestVotersSnapshotError::ClientSendError(err) => Some(err), + } + } +} + +/// [handle_client_message_request_voters_snapshot] is a function that processes +/// the client message request for a voters snapshot. +pub async fn handle_client_message_request_voters_snapshot( + client_id: ClientId, + data_state: Arc>, + client_thread_state: Arc>>, +) -> Result<(), HandleRequestVotersSnapshotError> +where + K: Sink + Clone + Unpin, +{ + let (client_thread_state_read_lock_guard, data_state_read_lock_guard) = + futures::join!(client_thread_state.read(), data_state.read()); + + let voters_data = data_state_read_lock_guard + .latest_voters() + .cloned() + .collect::>(); + + let voters_data = Arc::new(voters_data); + + if let Some(client) = client_thread_state_read_lock_guard.clients.get(&client_id) { + let mut sender = client.sender.clone(); + drop(client_thread_state_read_lock_guard); + + if let Err(err) = sender + .send(ServerMessage::VotersSnapshot(voters_data.clone())) + .await + { + drop_client_no_lock_guard(&client_id, client_thread_state.clone()).await; + return Err(HandleRequestVotersSnapshotError::ClientSendError(err)); + } + + return Ok(()); + } + Ok(()) +} + +/// [ProcessClientMessageError] represents the scope of errors that can be +/// returned from the [process_client_message] function. +#[derive(Debug)] +pub enum ProcessClientMessageError { + Connected(HandleConnectedError), + BlocksSnapshot(HandleRequestBlocksSnapshotsError), + NodeIdentitySnapshot(HandleRequestNodeIdentitySnapshotError), + HistogramSnapshot(HandleRequestHistogramSnapshotError), + VotersSnapshot(HandleRequestVotersSnapshotError), +} + +impl From for ProcessClientMessageError { + fn from(err: HandleConnectedError) -> Self { + ProcessClientMessageError::Connected(err) + } +} + +impl From for ProcessClientMessageError { + fn from(err: HandleRequestBlocksSnapshotsError) -> Self { + ProcessClientMessageError::BlocksSnapshot(err) + } +} + +impl From for ProcessClientMessageError { + fn from(err: HandleRequestNodeIdentitySnapshotError) -> Self { + ProcessClientMessageError::NodeIdentitySnapshot(err) + } +} + +impl From for ProcessClientMessageError { + fn from(err: HandleRequestHistogramSnapshotError) -> Self { + ProcessClientMessageError::HistogramSnapshot(err) + } +} + +impl From for ProcessClientMessageError { + fn from(err: HandleRequestVotersSnapshotError) -> Self { + ProcessClientMessageError::VotersSnapshot(err) + } +} + +impl std::fmt::Display for ProcessClientMessageError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ProcessClientMessageError::Connected(err) => { + write!(f, "process client message error: connected: {}", err) + } + ProcessClientMessageError::BlocksSnapshot(err) => { + write!(f, "process client message error: blocks snapshot: {}", err) + } + ProcessClientMessageError::NodeIdentitySnapshot(err) => { + write!( + f, + "process client message error: node identity snapshot: {}", + err + ) + } + ProcessClientMessageError::HistogramSnapshot(err) => { + write!( + f, + "process client message error: histogram snapshot: {}", + err + ) + } + ProcessClientMessageError::VotersSnapshot(err) => { + write!(f, "process client message error: voters snapshot: {}", err) + } + } + } +} + +impl std::error::Error for ProcessClientMessageError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + ProcessClientMessageError::Connected(err) => Some(err), + ProcessClientMessageError::BlocksSnapshot(err) => Some(err), + ProcessClientMessageError::NodeIdentitySnapshot(err) => Some(err), + ProcessClientMessageError::HistogramSnapshot(err) => Some(err), + ProcessClientMessageError::VotersSnapshot(err) => Some(err), + } + } +} + +/// [process_client_message] is a function that processes the client message +/// and processes the message accordingly. +/// +/// The [DataState] is provided and is used only as a Read lock to distribute +/// the current state of the system to the clients upon request. +/// +/// The [ClientThreadState] is provided as it needs to be updated with new +/// subscriptions / new connections depending on the incoming +/// [InternalClientMessage] +pub async fn process_client_message( + message: InternalClientMessage, + data_state: Arc>, + client_thread_state: Arc>>, +) -> Result<(), ProcessClientMessageError> +where + K: Sink + Clone + Unpin, +{ + match message { + InternalClientMessage::Connected(sender) => { + handle_client_message_connected(sender, client_thread_state).await?; + Ok(()) + } + + InternalClientMessage::Disconnected(client_id) => { + handle_client_message_disconnected(client_id, client_thread_state).await; + Ok(()) + } + + InternalClientMessage::Request(client_id, ClientMessage::SubscribeLatestBlock) => { + handle_client_message_subscribe_latest_block(client_id, client_thread_state).await; + Ok(()) + } + + InternalClientMessage::Request(client_id, ClientMessage::SubscribeNodeIdentity) => { + handle_client_message_subscribe_node_identity(client_id, client_thread_state).await; + Ok(()) + } + + InternalClientMessage::Request(client_id, ClientMessage::SubscribeVoters) => { + handle_client_message_subscribe_voters(client_id, client_thread_state).await; + Ok(()) + } + + InternalClientMessage::Request(client_id, ClientMessage::RequestBlocksSnapshot) => { + handle_client_message_request_blocks_snapshot( + client_id, + data_state, + client_thread_state, + ) + .await?; + Ok(()) + } + + InternalClientMessage::Request(client_id, ClientMessage::RequestNodeIdentitySnapshot) => { + handle_client_message_request_node_identity_snapshot( + client_id, + data_state, + client_thread_state, + ) + .await?; + Ok(()) + } + + InternalClientMessage::Request(client_id, ClientMessage::RequestHistogramSnapshot) => { + handle_client_message_request_histogram_snapshot( + client_id, + data_state, + client_thread_state, + ) + .await?; + Ok(()) + } + + InternalClientMessage::Request(client_id, ClientMessage::RequestVotersSnapshot) => { + handle_client_message_request_voters_snapshot( + client_id, + data_state, + client_thread_state, + ) + .await?; + Ok(()) + } + } +} + +/// [clone_block_detail] is a utility function that clones a [BlockDetail] +/// instance. +pub fn clone_block_detail(input: &BlockDetail) -> BlockDetail { + BlockDetail { + hash: input.hash, + proposer_id: input.proposer_id.clone(), + height: input.height, + size: input.size, + time: input.time, + num_transactions: input.num_transactions, + fee_recipient: input.fee_recipient.clone(), + block_reward: input.block_reward.clone(), + } +} + +/// [drop_failed_client_sends] is a function that will drop all of the failed +/// client sends from the client thread state. +async fn drop_failed_client_sends( + client_thread_state: Arc>>, + failed_client_sends: Vec, +) { + // Let's acquire our write lock + let mut client_thread_state_write_lock_guard = client_thread_state.write().await; + + // We want to drop all of the failed clients. + // There's an optimization to be had here + for client_id in failed_client_sends { + drop_client_client_thread_state_write_guard( + &client_id, + &mut client_thread_state_write_lock_guard, + ); + } +} + +/// [handle_received_block_detail] is a function that processes received Block +/// details and will attempt to distribute the message to all of the clients +/// that are subscribed to the latest block stream. +async fn handle_received_block_detail( + client_thread_state: Arc>>, + block_detail: BlockDetail, +) where + K: Sink + Clone + Unpin, +{ + let client_thread_state_read_lock_guard = client_thread_state.read().await; + + // These are the clients who are subscribed to the latest blocks, that + // have an active ClientState within the system. + let latest_block_subscribers = client_thread_state_read_lock_guard + .subscribed_latest_block + .iter() + .map(|client_id| { + ( + client_id, + client_thread_state_read_lock_guard.clients.get(client_id), + ) + }) + .filter(|(_, client)| client.is_some()); + + let arc_block_detail = Arc::new(block_detail); + // We collect the results of sending the latest block to the clients. + let client_send_result_future = latest_block_subscribers.map(|(client_id, client)| { + let arc_block_detail = arc_block_detail.clone(); + async move { + // This is guaranteed to be a some now + let client = client.unwrap(); + let mut sender = client.sender.clone(); + let send_result = sender + .send(ServerMessage::LatestBlock(arc_block_detail)) + .await; + + (client_id, send_result) + } + }); + + let client_send_results = futures::future::join_all(client_send_result_future).await; + + // These are the clients we failed to send the message to. We copy these + // here so we can drop our read lock. + let failed_client_sends = client_send_results + .into_iter() + .filter(|(_, send_result)| send_result.is_err()) + .map(|(client_id, _)| *client_id) + .collect::>(); + + // Explicitly Drop the read lock. + drop(client_thread_state_read_lock_guard); + + if failed_client_sends.is_empty() { + return; + } + + drop_failed_client_sends(client_thread_state, failed_client_sends).await; +} + +/// [handle_received_node_identity] is a function that processes received +/// NodeIdentity and will attempt to distribute the message to all of the +/// clients that are subscribed to the node identity stream. +async fn handle_received_node_identity( + client_thread_state: Arc>>, + node_identity: NodeIdentity, +) where + K: Sink + Clone + Unpin, +{ + let client_thread_state_read_lock_guard = client_thread_state.read().await; + + // These are the clients who are subscribed to the node identities, that + // have an active ClientState within the system. + let node_identity_subscribers = client_thread_state_read_lock_guard + .subscribed_node_identity + .iter() + .map(|client_id| { + ( + client_id, + client_thread_state_read_lock_guard.clients.get(client_id), + ) + }) + .filter(|(_, client)| client.is_some()); + + let arc_node_identity = Arc::new(node_identity); + // We collect the results of sending the latest block to the clients. + let client_send_result_future = node_identity_subscribers.map(|(client_id, client)| { + let arc_node_identity = arc_node_identity.clone(); + async move { + // This is guaranteed to be a some now + let client = client.unwrap(); + let mut sender = client.sender.clone(); + let send_result = sender + .send(ServerMessage::LatestNodeIdentity(arc_node_identity.clone())) + .await; + + (client_id, send_result) + } + }); + + let client_send_results = futures::future::join_all(client_send_result_future).await; + + // These are the clients we failed to send the message to. We copy these + // here so we can drop our read lock. + let failed_client_sends = client_send_results + .into_iter() + .filter(|(_, send_result)| send_result.is_err()) + .map(|(client_id, _)| *client_id) + .collect::>(); + + // Explicitly Drop the read lock. + drop(client_thread_state_read_lock_guard); + + if failed_client_sends.is_empty() { + return; + } + + drop_failed_client_sends(client_thread_state, failed_client_sends).await; +} + +/// [handle_received_voters] is a function that processes received voters and +/// will attempt to distribute the message to all of the clients that are +/// subscribed to the voters stream. +async fn handle_received_voters( + client_thread_state: Arc>>, + voters: BitVec, +) where + K: Sink + Clone + Unpin, +{ + let client_thread_state_read_lock_guard = client_thread_state.read().await; + + // These are the clients who are subscribed to the node identities, that + // have an active ClientState within the system. + let node_identity_subscribers = client_thread_state_read_lock_guard + .subscribed_voters + .iter() + .map(|client_id| { + ( + client_id, + client_thread_state_read_lock_guard.clients.get(client_id), + ) + }) + .filter(|(_, client)| client.is_some()); + + // We collect the results of sending the latest block to the clients. + let client_send_result_future = node_identity_subscribers.map(|(client_id, client)| { + let voters = voters.clone(); + async move { + // This is guaranteed to be a some now + let client = client.unwrap(); + let mut sender = client.sender.clone(); + let send_result = sender.send(ServerMessage::LatestVoters(voters)).await; + + (client_id, send_result) + } + }); + + let client_send_results = futures::future::join_all(client_send_result_future).await; + + // These are the clients we failed to send the message to. We copy these + // here so we can drop our read lock. + let failed_client_sends = client_send_results + .into_iter() + .filter(|(_, send_result)| send_result.is_err()) + .map(|(client_id, _)| *client_id) + .collect::>(); + + // Explicitly Drop the read lock. + drop(client_thread_state_read_lock_guard); + + if failed_client_sends.is_empty() { + return; + } + + drop_failed_client_sends(client_thread_state, failed_client_sends).await; +} + +/// InternalClientMessageProcessingTask represents an async task for +/// InternalClientMessages, and making the appropriate updates to the +/// [ClientThreadState] and [DataState]. +pub struct InternalClientMessageProcessingTask { + pub task_handle: Option>, +} + +impl InternalClientMessageProcessingTask { + /// new creates a new [InternalClientMessageProcessingTask] with the + /// given internal_client_message_receiver, data_state, and + /// client_thread_state. + /// + /// Calling this function will start an async task that will start + /// processing. The handle for the async task is stored within the + /// returned state. + pub fn new( + internal_client_message_receiver: S, + data_state: Arc>, + client_thread_state: Arc>>, + ) -> Self + where + S: Stream> + Send + Sync + Unpin + 'static, + K: Sink + Clone + Send + Sync + Unpin + 'static, + { + let task_handle = async_std::task::spawn(Self::process_internal_client_message_stream( + internal_client_message_receiver, + data_state.clone(), + client_thread_state.clone(), + )); + + Self { + task_handle: Some(task_handle), + } + } + + /// [process_internal_client_message_stream] is a function that processes the + /// client handling stream. This stream is responsible for managing the state + /// of the connected clients, and their subscriptions. + async fn process_internal_client_message_stream( + mut stream: S, + data_state: Arc>, + client_thread_state: Arc>>, + ) where + S: Stream> + Unpin, + K: Sink + Clone + Unpin, + { + loop { + let message_result = stream.next().await; + let message = if let Some(message) = message_result { + message + } else { + tracing::error!("internal client message handler closed."); + panic!("InternalClientMessageProcessingTask stream closed, unable to process new requests from clients."); + }; + + if let Err(err) = + process_client_message(message, data_state.clone(), client_thread_state.clone()) + .await + { + tracing::info!( + "internal client message processing encountered an error: {}", + err, + ); + return; + } + } + } +} + +/// [drop] implementation for [InternalClientMessageProcessingTask] that will +/// cancel the task if it is still running. +impl Drop for InternalClientMessageProcessingTask { + fn drop(&mut self) { + let task_handle = self.task_handle.take(); + if let Some(task_handle) = task_handle { + async_std::task::block_on(task_handle.cancel()); + } + } +} + +/// [ProcessDistributeBlockDetailHandlingTask] represents an async task for +/// processing the incoming [BlockDetail] and distributing them to all +/// subscribed clients. +pub struct ProcessDistributeBlockDetailHandlingTask { + pub task_handle: Option>, +} + +impl ProcessDistributeBlockDetailHandlingTask { + /// [new] creates a new [ProcessDistributeBlockDetailHandlingTask] with the + /// given client_thread_state and block_detail_receiver. + /// + /// Calling this function will start an async task that will start + /// processing. The handle for the async task is stored within the + /// returned state. + pub fn new( + client_thread_state: Arc>>, + block_detail_receiver: S, + ) -> Self + where + S: Stream> + Send + Sync + Unpin + 'static, + K: Sink + Clone + Send + Sync + Unpin + 'static, + { + let task_handle = + async_std::task::spawn(Self::process_distribute_block_detail_handling_stream( + client_thread_state.clone(), + block_detail_receiver, + )); + + Self { + task_handle: Some(task_handle), + } + } + + /// [process_distribute_block_detail_handling_stream] is a function that + /// processes the the [Stream] of incoming [BlockDetail] and distributes them + /// to all subscribed clients. + async fn process_distribute_block_detail_handling_stream( + client_thread_state: Arc>>, + mut stream: S, + ) where + S: Stream> + Unpin, + K: Sink + Clone + Unpin, + { + loop { + let block_detail_result = stream.next().await; + + let block_detail = if let Some(block_detail) = block_detail_result { + block_detail + } else { + tracing::error!( + "block detail stream closed. shutting down client handling stream.", + ); + return; + }; + + handle_received_block_detail(client_thread_state.clone(), block_detail).await + } + } +} + +/// [drop] implementation for [ProcessDistributeBlockDetailHandlingTask] that will +/// cancel the task if it is still running. +impl Drop for ProcessDistributeBlockDetailHandlingTask { + fn drop(&mut self) { + let task_handle = self.task_handle.take(); + if let Some(task_handle) = task_handle { + async_std::task::block_on(task_handle.cancel()); + } + } +} + +/// [ProcessDistributeNodeIdentityHandlingTask] represents an async task for +/// processing the incoming [NodeIdentity] and distributing them to all +/// subscribed clients. +pub struct ProcessDistributeNodeIdentityHandlingTask { + pub task_handle: Option>, +} + +impl ProcessDistributeNodeIdentityHandlingTask { + /// [new] creates a new [ProcessDistributeNodeIdentityHandlingTask] with the + /// given client_thread_state and node_identity_receiver. + /// + /// Calling this function will start an async task that will start + /// processing. The handle for the async task is stored within the + /// returned state. + pub fn new( + client_thread_state: Arc>>, + node_identity_receiver: S, + ) -> Self + where + S: Stream + Send + Sync + Unpin + 'static, + K: Sink + Clone + Send + Sync + Unpin + 'static, + { + let task_handle = + async_std::task::spawn(Self::process_distribute_node_identity_handling_stream( + client_thread_state.clone(), + node_identity_receiver, + )); + + Self { + task_handle: Some(task_handle), + } + } + + /// [process_distribute_node_identity_handling_stream] is a function that + /// processes the the [Stream] of incoming [NodeIdentity] and distributes them + /// to all subscribed clients. + async fn process_distribute_node_identity_handling_stream( + client_thread_state: Arc>>, + mut stream: S, + ) where + S: Stream + Unpin, + K: Sink + Clone + Unpin, + { + loop { + let node_identity_result = stream.next().await; + + let node_identity = if let Some(node_identity) = node_identity_result { + node_identity + } else { + tracing::error!( + "node identity stream closed. shutting down client handling stream.", + ); + return; + }; + + handle_received_node_identity(client_thread_state.clone(), node_identity).await + } + } +} + +/// [drop] implementation for [ProcessDistributeNodeIdentityHandlingTask] that +/// will cancel the task if it is still running. +impl Drop for ProcessDistributeNodeIdentityHandlingTask { + fn drop(&mut self) { + let task_handle = self.task_handle.take(); + if let Some(task_handle) = task_handle { + async_std::task::block_on(task_handle.cancel()); + } + } +} + +/// [ProcessDistributeVotersHandlingTask] represents an async task for +/// processing the incoming [BitVec] and distributing them to all +/// subscribed clients. +pub struct ProcessDistributeVotersHandlingTask { + pub task_handle: Option>, +} + +impl ProcessDistributeVotersHandlingTask { + /// [new] creates a new [ProcessDistributeVotersHandlingTask] with the + /// given client_thread_state and voters_receiver. + /// + /// Calling this function will start an async task that will start + /// processing. The handle for the async task is stored within the + /// returned state. + pub fn new( + client_thread_state: Arc>>, + voters_receiver: S, + ) -> Self + where + S: Stream> + Send + Sync + Unpin + 'static, + K: Sink + Clone + Send + Sync + Unpin + 'static, + { + let task_handle = async_std::task::spawn(Self::process_distribute_voters_handling_stream( + client_thread_state.clone(), + voters_receiver, + )); + + Self { + task_handle: Some(task_handle), + } + } + + /// [process_distribute_voters_handling_stream] is a function that processes + /// the the [Stream] of incoming [BitVec] and distributes them to all + /// subscribed clients. + async fn process_distribute_voters_handling_stream( + client_thread_state: Arc>>, + mut stream: S, + ) where + S: Stream> + Unpin, + K: Sink + Clone + Unpin, + { + loop { + let voters_result = stream.next().await; + + let voters = if let Some(voters) = voters_result { + voters + } else { + tracing::error!("voters stream closed. shutting down client handling stream.",); + return; + }; + + handle_received_voters(client_thread_state.clone(), voters).await + } + } +} + +/// [drop] implementation for [ProcessDistributeVotersHandlingTask] that will +/// cancel the task if it is still running. +impl Drop for ProcessDistributeVotersHandlingTask { + fn drop(&mut self) { + let task_handle = self.task_handle.take(); + if let Some(task_handle) = task_handle { + async_std::task::block_on(task_handle.cancel()); + } + } +} + +#[cfg(test)] +pub mod tests { + use super::{ClientThreadState, InternalClientMessageProcessingTask}; + use crate::service::{ + client_id::ClientId, + client_message::{ClientMessage, InternalClientMessage}, + client_state::{ + ProcessDistributeBlockDetailHandlingTask, ProcessDistributeNodeIdentityHandlingTask, + ProcessDistributeVotersHandlingTask, + }, + data_state::{ + create_block_detail_from_leaf, DataState, LocationDetails, NodeIdentity, + ProcessLeafStreamTask, + }, + server_message::ServerMessage, + }; + use async_std::{prelude::FutureExt, sync::RwLock}; + use bitvec::vec::BitVec; + use espresso_types::{Leaf, NodeState, ValidatedState}; + use futures::{ + channel::mpsc::{self, Sender}, + SinkExt, StreamExt, + }; + use hotshot_types::{signature_key::BLSPubKey, traits::signature_key::SignatureKey}; + use std::{sync::Arc, time::Duration}; + + pub fn create_test_client_thread_state() -> ClientThreadState> { + ClientThreadState { + clients: Default::default(), + subscribed_latest_block: Default::default(), + subscribed_node_identity: Default::default(), + subscribed_voters: Default::default(), + connection_id_counter: ClientId::from_count(1), + } + } + + pub fn create_test_data_state() -> (NodeIdentity, NodeIdentity, NodeIdentity, DataState) { + let node_1 = { + let (pub_key, _) = BLSPubKey::generated_from_seed_indexed([0; 32], 0); + NodeIdentity::new( + pub_key, + Some("a".to_string()), + Some("http://localhost/".parse().unwrap()), + Some("company".to_string()), + Some("https://example.com/".parse().unwrap()), + Some(LocationDetails::new( + Some((0.0, 0.0)), + Some("US".to_string()), + )), + Some("Windows 11".to_string()), + Some("espresso".to_string()), + Some("residential".to_string()), + ) + }; + + let node_2 = { + let (pub_key, _) = BLSPubKey::generated_from_seed_indexed([0; 32], 1); + NodeIdentity::new( + pub_key, + Some("b".to_string()), + Some("http://localhost/".parse().unwrap()), + Some("company".to_string()), + Some("https://example.com/".parse().unwrap()), + Some(LocationDetails::new( + Some((0.0, 0.0)), + Some("US".to_string()), + )), + Some("Windows 11".to_string()), + Some("espresso".to_string()), + Some("residential".to_string()), + ) + }; + + let node_3 = { + let (pub_key, _) = BLSPubKey::generated_from_seed_indexed([0; 32], 2); + NodeIdentity::new( + pub_key, + Some("b".to_string()), + Some("http://localhost/".parse().unwrap()), + Some("company".to_string()), + Some("https://example.com/".parse().unwrap()), + Some(LocationDetails::new( + Some((0.0, 0.0)), + Some("US".to_string()), + )), + Some("Windows 11".to_string()), + Some("espresso".to_string()), + Some("residential".to_string()), + ) + }; + + let mut data_state: DataState = Default::default(); + data_state.add_node_identity(node_1.clone()); + data_state.add_node_identity(node_2.clone()); + data_state.add_node_identity(node_3.clone()); + + (node_1, node_2, node_3, data_state) + } + + #[async_std::test] + async fn test_client_handling_stream_task_shutdown() { + let (_, _, _, data_state) = create_test_data_state(); + let client_thread_state = Arc::new(RwLock::new(create_test_client_thread_state())); + let data_state = Arc::new(RwLock::new(data_state)); + + let (_internal_client_message_sender, internal_client_message_receiver) = mpsc::channel(1); + let _process_internal_client_message_handle = InternalClientMessageProcessingTask::new( + internal_client_message_receiver, + data_state, + client_thread_state, + ); + } + + #[async_std::test] + async fn test_process_client_handling_stream_request_latest_voters_snapshot() { + let (_, _, _, mut data_state) = create_test_data_state(); + let client_thread_state = Arc::new(RwLock::new(create_test_client_thread_state())); + let voters_1 = BitVec::from_vec(vec![0x55]); + let voters_2 = BitVec::from_vec(vec![0xAA]); + data_state.add_latest_voters(voters_1.clone()); + data_state.add_latest_voters(voters_2.clone()); + + let data_state = Arc::new(RwLock::new(data_state)); + + let (internal_client_message_sender, internal_client_message_receiver) = mpsc::channel(1); + let (server_message_sender_1, mut server_message_receiver_1) = mpsc::channel(1); + let (server_message_sender_2, mut server_message_receiver_2) = mpsc::channel(1); + let _process_internal_client_message_handle = InternalClientMessageProcessingTask::new( + internal_client_message_receiver, + data_state, + client_thread_state, + ); + + // Send a Connected Message to the server + let mut internal_client_message_sender_1 = internal_client_message_sender.clone(); + assert_eq!( + internal_client_message_sender_1 + .send(InternalClientMessage::Connected(server_message_sender_1)) + .await, + Ok(()) + ); + + assert_eq!( + server_message_receiver_1.next().await, + Some(ServerMessage::YouAre(ClientId::from_count(2))), + ); + + let client_1_id = ClientId::from_count(2); + + let mut internal_client_message_sender_2 = internal_client_message_sender; + assert_eq!( + internal_client_message_sender_2 + .send(InternalClientMessage::Connected(server_message_sender_2)) + .await, + Ok(()) + ); + + assert_eq!( + server_message_receiver_2.next().await, + Some(ServerMessage::YouAre(ClientId::from_count(3))), + ); + + assert_eq!( + internal_client_message_sender_1 + .send(InternalClientMessage::Request( + client_1_id, + ClientMessage::RequestVotersSnapshot + )) + .await, + Ok(()), + ); + + assert_eq!( + server_message_receiver_1.next().await, + Some(ServerMessage::VotersSnapshot(Arc::new(vec![ + voters_1, voters_2 + ]))), + ); + } + + #[async_std::test] + #[cfg(feature = "testing")] + async fn test_process_client_handling_stream_request_latest_blocks_snapshot() { + use super::clone_block_detail; + use crate::service::data_state::create_block_detail_from_leaf; + + let (_, _, _, mut data_state) = create_test_data_state(); + let client_thread_state = Arc::new(RwLock::new(create_test_client_thread_state())); + let leaf_1 = Leaf::genesis(&ValidatedState::default(), &NodeState::mock()).await; + let block_1 = create_block_detail_from_leaf(&leaf_1); + data_state.add_latest_block(clone_block_detail(&block_1)); + + let data_state = Arc::new(RwLock::new(data_state)); + + let (internal_client_message_sender, internal_client_message_receiver) = mpsc::channel(1); + let (server_message_sender_1, mut server_message_receiver_1) = mpsc::channel(1); + let (server_message_sender_2, mut server_message_receiver_2) = mpsc::channel(1); + let mut process_internal_client_message_handle = InternalClientMessageProcessingTask::new( + internal_client_message_receiver, + data_state, + client_thread_state, + ); + + // Send a Connected Message to the server + let mut internal_client_message_sender_1 = internal_client_message_sender.clone(); + assert_eq!( + internal_client_message_sender_1 + .send(InternalClientMessage::Connected(server_message_sender_1)) + .await, + Ok(()) + ); + + assert_eq!( + server_message_receiver_1.next().await, + Some(ServerMessage::YouAre(ClientId::from_count(2))), + ); + + let client_1_id = ClientId::from_count(2); + + let mut internal_client_message_sender_2 = internal_client_message_sender; + assert_eq!( + internal_client_message_sender_2 + .send(InternalClientMessage::Connected(server_message_sender_2)) + .await, + Ok(()), + ); + + assert_eq!( + server_message_receiver_2.next().await, + Some(ServerMessage::YouAre(ClientId::from_count(3))), + ); + + assert_eq!( + internal_client_message_sender_1 + .send(InternalClientMessage::Request( + client_1_id, + ClientMessage::RequestBlocksSnapshot + )) + .await, + Ok(()), + ); + + assert_eq!( + server_message_receiver_1.next().await, + Some(ServerMessage::BlocksSnapshot(Arc::new(vec![block_1]))), + ); + + if let Some(process_internal_client_message_handle) = + process_internal_client_message_handle.task_handle.take() + { + assert_eq!(process_internal_client_message_handle.cancel().await, None); + } + } + + #[async_std::test] + async fn test_process_client_handling_stream_request_node_identity_snapshot() { + let (node_1, node_2, node_3, data_state) = create_test_data_state(); + let client_thread_state = Arc::new(RwLock::new(create_test_client_thread_state())); + let data_state = Arc::new(RwLock::new(data_state)); + + let (internal_client_message_sender, internal_client_message_receiver) = mpsc::channel(1); + let (server_message_sender_1, mut server_message_receiver_1) = mpsc::channel(1); + let (server_message_sender_2, mut server_message_receiver_2) = mpsc::channel(1); + let mut process_internal_client_message_handle = InternalClientMessageProcessingTask::new( + internal_client_message_receiver, + data_state, + client_thread_state, + ); + + // Send a Connected Message to the server + let mut internal_client_message_sender_1 = internal_client_message_sender.clone(); + assert_eq!( + internal_client_message_sender_1 + .send(InternalClientMessage::Connected(server_message_sender_1)) + .await, + Ok(()) + ); + + assert_eq!( + server_message_receiver_1.next().await, + Some(ServerMessage::YouAre(ClientId::from_count(2))), + ); + + let client_1_id = ClientId::from_count(2); + + // Send another Connected Message to the server + let mut internal_client_message_sender_2 = internal_client_message_sender; + assert_eq!( + internal_client_message_sender_2 + .send(InternalClientMessage::Connected(server_message_sender_2)) + .await, + Ok(()) + ); + + assert_eq!( + server_message_receiver_2.next().await, + Some(ServerMessage::YouAre(ClientId::from_count(3))), + ); + + assert_eq!( + internal_client_message_sender_1 + .send(InternalClientMessage::Request( + client_1_id, + ClientMessage::RequestNodeIdentitySnapshot + )) + .await, + Ok(()), + ); + + assert_eq!( + server_message_receiver_1.next().await, + Some(ServerMessage::NodeIdentitySnapshot(Arc::new(vec![ + node_1.clone(), + node_2.clone(), + node_3.clone() + ]))), + ); + + if let Some(process_internal_client_message_handle) = + process_internal_client_message_handle.task_handle.take() + { + assert_eq!(process_internal_client_message_handle.cancel().await, None); + } + } + + #[async_std::test] + async fn test_process_client_handling_stream_subscribe_latest_block() { + let (_, _, _, data_state) = create_test_data_state(); + let client_thread_state = Arc::new(RwLock::new(create_test_client_thread_state())); + let data_state = Arc::new(RwLock::new(data_state)); + + let (mut leaf_sender, leaf_receiver) = mpsc::channel(1); + let (block_detail_sender, block_detail_receiver) = mpsc::channel(1); + let (voters_sender, voters_receiver) = mpsc::channel(1); + let (internal_client_message_sender, internal_client_message_receiver) = mpsc::channel(1); + let (server_message_sender_1, mut server_message_receiver_1) = mpsc::channel(1); + let (server_message_sender_2, mut server_message_receiver_2) = mpsc::channel(1); + let (server_message_sender_3, mut server_message_receiver_3) = mpsc::channel(1); + let mut process_internal_client_message_handle = InternalClientMessageProcessingTask::new( + internal_client_message_receiver, + data_state.clone(), + client_thread_state.clone(), + ); + + let mut process_distribute_block_detail_handle = + ProcessDistributeBlockDetailHandlingTask::new( + client_thread_state.clone(), + block_detail_receiver, + ); + + let mut process_distribute_voters_handle = + ProcessDistributeVotersHandlingTask::new(client_thread_state, voters_receiver); + + let mut process_leaf_stream_handle = ProcessLeafStreamTask::new( + leaf_receiver, + data_state, + block_detail_sender, + voters_sender, + ); + + // Send a Connected Message to the server + let mut internal_client_message_sender_1 = internal_client_message_sender.clone(); + assert_eq!( + internal_client_message_sender_1 + .send(InternalClientMessage::Connected(server_message_sender_1)) + .await, + Ok(()) + ); + + assert_eq!( + server_message_receiver_1.next().await, + Some(ServerMessage::YouAre(ClientId::from_count(2))), + ); + + let client_1_id = ClientId::from_count(2); + let client_2_id = ClientId::from_count(3); + + // Send another Connected Message to the server + let mut internal_client_message_sender_2 = internal_client_message_sender.clone(); + assert_eq!( + internal_client_message_sender_2 + .send(InternalClientMessage::Connected(server_message_sender_2)) + .await, + Ok(()) + ); + + assert_eq!( + server_message_receiver_2.next().await, + Some(ServerMessage::YouAre(ClientId::from_count(3))), + ); + + // Send another Connected Message to the server + let mut internal_client_message_sender_3 = internal_client_message_sender; + assert_eq!( + internal_client_message_sender_3 + .send(InternalClientMessage::Connected(server_message_sender_3)) + .await, + Ok(()) + ); + + assert_eq!( + server_message_receiver_3.next().await, + Some(ServerMessage::YouAre(ClientId::from_count(4))), + ); + + assert_eq!( + internal_client_message_sender_1 + .send(InternalClientMessage::Request( + client_1_id, + ClientMessage::SubscribeLatestBlock + )) + .await, + Ok(()), + ); + + assert_eq!( + internal_client_message_sender_1 + .send(InternalClientMessage::Request( + client_2_id, + ClientMessage::SubscribeLatestBlock + )) + .await, + Ok(()), + ); + + // No response expected from the client messages at the moment. + + // send a new leaf + let leaf = Leaf::genesis(&ValidatedState::default(), &NodeState::mock()).await; + let expected_block = create_block_detail_from_leaf(&leaf); + let arc_expected_block = Arc::new(expected_block); + + assert_eq!(leaf_sender.send(leaf).await, Ok(())); + + // We should receive the Block Detail on each subscribed client + assert_eq!( + server_message_receiver_1.next().await, + Some(ServerMessage::LatestBlock(arc_expected_block.clone())) + ); + assert_eq!( + server_message_receiver_2.next().await, + Some(ServerMessage::LatestBlock(arc_expected_block.clone())) + ); + + if server_message_receiver_3 + .next() + .timeout(Duration::from_millis(10)) + .await + .is_ok() + { + panic!("receiver 3 should not have received the latest block."); + } + + if let Some(process_internal_client_message_handle) = + process_internal_client_message_handle.task_handle.take() + { + assert_eq!(process_internal_client_message_handle.cancel().await, None); + } + if let Some(process_distribute_block_detail_handle) = + process_distribute_block_detail_handle.task_handle.take() + { + assert_eq!(process_distribute_block_detail_handle.cancel().await, None); + } + if let Some(process_distribute_voters_handle) = + process_distribute_voters_handle.task_handle.take() + { + assert_eq!(process_distribute_voters_handle.cancel().await, None); + } + if let Some(process_leaf_stream_handle) = process_leaf_stream_handle.task_handle.take() { + assert_eq!(process_leaf_stream_handle.cancel().await, None); + } + } + + #[async_std::test] + async fn test_process_client_handling_stream_subscribe_node_identity() { + let (node_1, _, _, data_state) = create_test_data_state(); + let client_thread_state = Arc::new(RwLock::new(create_test_client_thread_state())); + let data_state = Arc::new(RwLock::new(data_state)); + + let (mut node_identity_sender, node_identity_receiver) = mpsc::channel(1); + let (internal_client_message_sender, internal_client_message_receiver) = mpsc::channel(1); + let (server_message_sender_1, mut server_message_receiver_1) = mpsc::channel(1); + let (server_message_sender_2, mut server_message_receiver_2) = mpsc::channel(1); + let (server_message_sender_3, mut server_message_receiver_3) = mpsc::channel(1); + let mut process_internal_client_message_handle = InternalClientMessageProcessingTask::new( + internal_client_message_receiver, + data_state.clone(), + client_thread_state.clone(), + ); + + let mut process_distribute_node_identity_handle = + ProcessDistributeNodeIdentityHandlingTask::new( + client_thread_state, + node_identity_receiver, + ); + + // Send a Connected Message to the server + let mut internal_client_message_sender_1 = internal_client_message_sender.clone(); + assert_eq!( + internal_client_message_sender_1 + .send(InternalClientMessage::Connected(server_message_sender_1)) + .await, + Ok(()) + ); + + assert_eq!( + server_message_receiver_1.next().await, + Some(ServerMessage::YouAre(ClientId::from_count(2))), + ); + + let client_1_id = ClientId::from_count(2); + let client_2_id = ClientId::from_count(3); + + // Send another Connected Message to the server + let mut internal_client_message_sender_2 = internal_client_message_sender.clone(); + assert_eq!( + internal_client_message_sender_2 + .send(InternalClientMessage::Connected(server_message_sender_2)) + .await, + Ok(()) + ); + + assert_eq!( + server_message_receiver_2.next().await, + Some(ServerMessage::YouAre(ClientId::from_count(3))), + ); + + // Send another Connected Message to the server + let mut internal_client_message_sender_3 = internal_client_message_sender.clone(); + assert_eq!( + internal_client_message_sender_3 + .send(InternalClientMessage::Connected(server_message_sender_3)) + .await, + Ok(()) + ); + + assert_eq!( + server_message_receiver_3.next().await, + Some(ServerMessage::YouAre(ClientId::from_count(4))), + ); + + assert_eq!( + internal_client_message_sender_1 + .send(InternalClientMessage::Request( + client_1_id, + ClientMessage::SubscribeNodeIdentity + )) + .await, + Ok(()), + ); + + assert_eq!( + internal_client_message_sender_1 + .send(InternalClientMessage::Request( + client_2_id, + ClientMessage::SubscribeNodeIdentity + )) + .await, + Ok(()), + ); + + // No response expected from the client messages at the moment. + + // send a new Node Identity + let node_identity = node_1; + assert_eq!( + node_identity_sender.send(node_identity.clone()).await, + Ok(()) + ); + + let arc_node_identity = Arc::new(node_identity.clone()); + + // We should receive the Block Detail on each subscribed client + assert_eq!( + server_message_receiver_1.next().await, + Some(ServerMessage::LatestNodeIdentity(arc_node_identity.clone())) + ); + assert_eq!( + server_message_receiver_2.next().await, + Some(ServerMessage::LatestNodeIdentity(arc_node_identity.clone())) + ); + + if let Some(process_internal_client_message_handle) = + process_internal_client_message_handle.task_handle.take() + { + assert_eq!(process_internal_client_message_handle.cancel().await, None); + } + + if let Some(process_distribute_node_identity_handle) = + process_distribute_node_identity_handle.task_handle.take() + { + assert_eq!(process_distribute_node_identity_handle.cancel().await, None); + } + } + + #[async_std::test] + async fn test_process_client_handling_stream_subscribe_voters() { + let (_, _, _, data_state) = create_test_data_state(); + let client_thread_state = Arc::new(RwLock::new(create_test_client_thread_state())); + let data_state = Arc::new(RwLock::new(data_state)); + + let (mut voters_sender, voters_receiver) = mpsc::channel(1); + let (internal_client_message_sender, internal_client_message_receiver) = mpsc::channel(1); + let (server_message_sender_1, mut server_message_receiver_1) = mpsc::channel(1); + let (server_message_sender_2, mut server_message_receiver_2) = mpsc::channel(1); + let (server_message_sender_3, mut server_message_receiver_3) = mpsc::channel(1); + let mut process_internal_client_message_handle = InternalClientMessageProcessingTask::new( + internal_client_message_receiver, + data_state.clone(), + client_thread_state.clone(), + ); + + let mut process_distribute_voters_handle = + ProcessDistributeVotersHandlingTask::new(client_thread_state, voters_receiver); + + // Send a Connected Message to the server + let mut internal_client_message_sender_1 = internal_client_message_sender.clone(); + assert_eq!( + internal_client_message_sender_1 + .send(InternalClientMessage::Connected(server_message_sender_1)) + .await, + Ok(()) + ); + + assert_eq!( + server_message_receiver_1.next().await, + Some(ServerMessage::YouAre(ClientId::from_count(2))), + ); + + let client_1_id = ClientId::from_count(2); + let client_2_id = ClientId::from_count(3); + + // Send another Connected Message to the server + let mut internal_client_message_sender_2 = internal_client_message_sender.clone(); + assert_eq!( + internal_client_message_sender_2 + .send(InternalClientMessage::Connected(server_message_sender_2)) + .await, + Ok(()) + ); + + assert_eq!( + server_message_receiver_2.next().await, + Some(ServerMessage::YouAre(ClientId::from_count(3))), + ); + + // Send another Connected Message to the server + let mut internal_client_message_sender_3 = internal_client_message_sender; + assert_eq!( + internal_client_message_sender_3 + .send(InternalClientMessage::Connected(server_message_sender_3)) + .await, + Ok(()) + ); + + assert_eq!( + server_message_receiver_3.next().await, + Some(ServerMessage::YouAre(ClientId::from_count(4))), + ); + + assert_eq!( + internal_client_message_sender_1 + .send(InternalClientMessage::Request( + client_1_id, + ClientMessage::SubscribeVoters + )) + .await, + Ok(()), + ); + + assert_eq!( + internal_client_message_sender_1 + .send(InternalClientMessage::Request( + client_2_id, + ClientMessage::SubscribeVoters + )) + .await, + Ok(()), + ); + + // No response expected from the client messages at the moment. + + // send a new Node Identity + let voters = BitVec::from_vec(vec![0x55]); + assert_eq!(voters_sender.send(voters.clone()).await, Ok(())); + + // We should receive the Block Detail on each subscribed client + assert_eq!( + server_message_receiver_1.next().await, + Some(ServerMessage::LatestVoters(voters.clone())) + ); + assert_eq!( + server_message_receiver_2.next().await, + Some(ServerMessage::LatestVoters(voters.clone())) + ); + + if let Some(process_internal_client_message_handle) = + process_internal_client_message_handle.task_handle.take() + { + assert_eq!(process_internal_client_message_handle.cancel().await, None); + } + if let Some(process_distribute_voters_handle) = + process_distribute_voters_handle.task_handle.take() + { + assert_eq!(process_distribute_voters_handle.cancel().await, None); + } + } + + // The following tests codify assumptions being bad on behalf of the Sink + // and Receivers provided by the async_std library. The purpose of these + // tests are to document these assumptions, and add a test to ensure that + // they behave as expected. If they ever do not behave as expected, then + // the rest of this library will need to be modified to account for that + // change in behavior. + + /// Tests the behavior of the sender and receiver when the sender is + /// dropped before the receiver is polled. + /// + /// This is a separate library test to ensure that the behavior that this + /// library is built on top of does not introduce a change that would + /// make this library no longer operate correctly. + #[async_std::test] + async fn test_sender_receiver_behavior_drop_sender_before_receiver_polled_closes_receiver() { + let (sender, mut receiver) = mpsc::channel::(1); + + drop(sender); + + assert_eq!(receiver.next().await, None); + } + + /// Tests the behavior of the sender and receiver when the sender is + /// dropped after the receiver is polled. + /// + /// This is a separate library test to ensure that the behavior that this + /// library is built on top of does not introduce a change that would + /// make this library no longer operate correctly. + #[async_std::test] + async fn test_sender_receiver_behavior_drop_sender_after_receiver_polled_closes_receiver() { + let (sender, mut receiver) = mpsc::channel::(1); + + let join_handle = async_std::task::spawn(async move { receiver.next().await }); + async_std::task::sleep(Duration::from_millis(100)).await; + drop(sender); + + assert_eq!(join_handle.await, None); + } + + /// Tests the behavior of the sender and receiver when the receiver is + /// dropped before anything is sent across the Sink. + /// + /// This is a separate library test to ensure that the behavior that this + /// library is built on top of does not introduce a change that would + /// make this library no longer operate correctly. + #[async_std::test] + async fn test_sender_receiver_behavior_drop_receiver_before_sender_sends() { + let (mut sender, receiver) = mpsc::channel(1); + + drop(receiver); + + assert_ne!(sender.send(1).await, Ok(())); + } + + /// Tests the behavior of the sender and receiver when the receiver is + /// dropped after the sender has sent a value. + /// + /// This is a separate library test to ensure that the behavior that this + /// library is built on top of does not introduce a change that would + /// make this library no longer operate correctly. + #[async_std::test] + async fn test_sender_receiver_behavior_drop_receiver_after_sender_sends() { + let (mut sender, mut receiver) = mpsc::channel(1); + + let join_handle = async_std::task::spawn(async move { + _ = sender.send(1).await; + async_std::task::sleep(Duration::from_millis(100)).await; + sender.send(2).await + }); + async_std::task::sleep(Duration::from_millis(50)).await; + receiver.close(); + + assert_eq!(receiver.next().await, Some(1)); + assert_eq!(receiver.next().await, None); + assert_ne!(join_handle.await, Ok(())); + } + + /// Tests to ensure that time timeout on an already ready future does not + /// cause the future to be dropped. + #[async_std::test] + async fn test_timeout_on_already_ready_future() { + assert_eq!( + futures::future::ready(1u64).timeout(Duration::ZERO).await, + Ok(1u64) + ); + } + + /// Tests to ensure that time timeout on a pending future does not cause the + /// future to be dropped. + #[async_std::test] + async fn test_timeout_on_async_block_resolves_when_polled() { + assert_eq!(async move { 1u64 }.timeout(Duration::ZERO).await, Ok(1u64),); + + assert_eq!( + async move { 1u64 } + .timeout(Duration::from_millis(100)) + .await, + Ok(1u64), + ); + } + + /// Tests to ensure that time timeout on a pending future does not cause the + /// future to be dropped. + #[async_std::test] + async fn test_timeout_on_pending_future_times_out() { + assert_ne!( + async_std::future::timeout(Duration::ZERO, futures::future::pending::()).await, + Ok(1u64) + ); + } + + /// Tests to ensure that bitvec is directly comparable without needing to + /// worry about their instances points to the same memory. + #[test] + fn test_bitvec_is_comparable() { + let bitvec_1: BitVec = BitVec::from_vec(vec![0x55]); + let bitvec_2: BitVec = BitVec::from_vec(vec![0x55]); + let bitvec_3: BitVec = BitVec::from_vec(vec![0xAA]); + + assert_eq!(bitvec_1, bitvec_2); + assert_ne!(bitvec_1, bitvec_3); + } +} diff --git a/node-metrics/src/service/data_state/location_details.rs b/node-metrics/src/service/data_state/location_details.rs new file mode 100644 index 000000000..56304c00b --- /dev/null +++ b/node-metrics/src/service/data_state/location_details.rs @@ -0,0 +1,95 @@ +use serde::{Deserialize, Serialize}; + +/// [LocationDetails] represents the details of the location of the node. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct LocationDetails { + pub coords: Option<(f64, f64)>, + pub country: Option, +} + +impl LocationDetails { + pub fn new(coords: Option<(f64, f64)>, country: Option) -> Self { + Self { coords, country } + } + + pub fn coords(&self) -> &Option<(f64, f64)> { + &self.coords + } + + pub fn country(&self) -> &Option { + &self.country + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_location_details_coords() { + let coords = (0.0, 0.0); + let country = "US".to_string(); + let location_details = LocationDetails::new(Some(coords), Some(country.clone())); + + assert_eq!(location_details.coords(), &Some(coords)); + } + + #[test] + fn test_location_details_country() { + let coords = (0.0, 0.0); + let country = "US".to_string(); + let location_details = LocationDetails::new(Some(coords), Some(country.clone())); + + assert_eq!(location_details.country(), &Some(country)); + } + + #[test] + fn test_location_details_eq() { + let coords = (0.0, 0.0); + let country = "US".to_string(); + let location_details = LocationDetails::new(Some(coords), Some(country.clone())); + let location_details_2 = LocationDetails::new(Some(coords), Some(country.clone())); + + assert_eq!(location_details, location_details_2); + } + + #[test] + fn test_location_details_debug() { + let coords = (0.0, 0.0); + let country = "US".to_string(); + let location_details = LocationDetails::new(Some(coords), Some(country.clone())); + + assert_eq!( + format!("{:?}", location_details), + format!( + "LocationDetails {{ coords: Some({:?}), country: Some({:?}) }}", + coords, country + ) + ); + } + + #[test] + fn test_location_details_clone() { + let coords = (0.0, 0.0); + let country = "US".to_string(); + let location_details = LocationDetails::new(Some(coords), Some(country.clone())); + let cloned_location_details = location_details.clone(); + + assert_eq!(location_details, cloned_location_details); + } + + #[test] + #[cfg(feature = "testing")] + fn test_location_serialization() { + use serde_json; + + let coords = (1.2, 3.4); + let country = "US".to_string(); + let location_details = LocationDetails::new(Some(coords), Some(country.clone())); + + let serialized = serde_json::to_string(&location_details).unwrap(); + let deserialized: LocationDetails = serde_json::from_str(&serialized).unwrap(); + + assert_eq!(location_details, deserialized); + } +} diff --git a/node-metrics/src/service/data_state/mod.rs b/node-metrics/src/service/data_state/mod.rs new file mode 100644 index 000000000..3a4dea838 --- /dev/null +++ b/node-metrics/src/service/data_state/mod.rs @@ -0,0 +1,786 @@ +pub mod location_details; +pub mod node_identity; + +use async_std::{sync::RwLock, task::JoinHandle}; +use bitvec::vec::BitVec; +use circular_buffer::CircularBuffer; +use espresso_types::{Header, Payload, SeqTypes}; +use futures::{channel::mpsc::SendError, Sink, SinkExt, Stream, StreamExt}; +use hotshot_query_service::{ + availability::{QueryableHeader, QueryablePayload}, + explorer::{BlockDetail, ExplorerHeader, Timestamp}, + Leaf, Resolvable, +}; +use hotshot_stake_table::vec_based::StakeTable; +use hotshot_types::{ + light_client::{CircuitField, StateVerKey}, + signature_key::BLSPubKey, + traits::{ + block_contents::BlockHeader, + stake_table::{SnapshotVersion, StakeTableScheme}, + BlockPayload, + }, +}; +pub use location_details::LocationDetails; +pub use node_identity::NodeIdentity; +use std::{collections::HashSet, iter::zip, sync::Arc}; +use time::OffsetDateTime; + +/// MAX_HISTORY represents the last N records that are stored within the +/// DataState structure for the various different sample types. +const MAX_HISTORY: usize = 50; + +/// [DataState] represents the state of the data that is being stored within +/// the service. +#[cfg_attr(test, derive(Default))] +pub struct DataState { + latest_blocks: CircularBuffer>, + latest_voters: CircularBuffer>, + stake_table: StakeTable, + // Do we need any other data at the moment? + node_identity: Vec, +} + +impl DataState { + pub fn new( + latest_blocks: CircularBuffer>, + latest_voters: CircularBuffer>, + stake_table: StakeTable, + node_identity: Vec, + ) -> Self { + Self { + latest_blocks, + latest_voters, + stake_table, + node_identity, + } + } + + pub fn latest_blocks(&self) -> impl Iterator> { + self.latest_blocks.iter() + } + + pub fn latest_voters(&self) -> impl Iterator> { + self.latest_voters.iter() + } + + pub fn stake_table(&self) -> &StakeTable { + &self.stake_table + } + + pub fn node_identity(&self) -> impl Iterator { + self.node_identity.iter() + } + + pub fn replace_stake_table( + &mut self, + stake_table: StakeTable, + ) { + self.stake_table = stake_table; + + // We want to make sure that we're accounting for this node identity + // information that we have. In the case of any new public keys + // being added, we want to ensure we have an entry for them in our + // node identity list. + + let current_identity_set = self + .node_identity + .iter() + .map(|node_identity| *node_identity.public_key()) + .collect::>(); + + let stake_table_iter_result = self.stake_table.try_iter(SnapshotVersion::Head); + let stake_table_iter = match stake_table_iter_result { + Ok(into_iter) => into_iter, + Err(_) => return, + }; + + let missing_node_identity_entries = + stake_table_iter.filter(|(key, _, _)| !current_identity_set.contains(key)); + + self.node_identity.extend( + missing_node_identity_entries.map(|(key, _, _)| NodeIdentity::from_public_key(key)), + ); + } + + pub fn add_latest_block(&mut self, block: BlockDetail) { + self.latest_blocks.push_back(block); + } + + pub fn add_latest_voters(&mut self, voters: BitVec) { + self.latest_voters.push_back(voters); + } + + pub fn add_node_identity(&mut self, identity: NodeIdentity) { + // We need to check to see if this identity is already in the list, + // if it is, we will want to replace it. + + let pub_key = identity.public_key(); + + let mut matching_public_keys = self + .node_identity + .iter() + // We want the index of the entry for easier editing + .enumerate() + .filter(|(_, node_identity)| node_identity.public_key() == pub_key); + + // We only expect this have a single entry. + let existing_node_identity_option = matching_public_keys.next(); + + debug_assert_eq!(matching_public_keys.next(), None); + + if let Some((index, _)) = existing_node_identity_option { + self.node_identity[index] = identity; + return; + } + + // This entry doesn't appear in our table, so let's add it. + self.node_identity.push(identity); + } +} + +/// [create_block_detail_from_leaf] is a helper function that will build a +/// [BlockDetail] from the reference to [Leaf]. +pub fn create_block_detail_from_leaf(leaf: &Leaf) -> BlockDetail { + let block_header = leaf.block_header(); + let block_payload = &leaf.block_payload().unwrap_or(Payload::empty().0); + + let transaction_iter = block_payload.iter(block_header.metadata()); + + // Calculate the number of transactions and the total payload size of the + // transactions contained within the Payload. + let (num_transactions, total_payload_size) = transaction_iter.fold( + (0u64, 0u64), + |(num_transactions, total_payload_size), tx_index| { + ( + num_transactions + 1, + total_payload_size + + block_payload + .transaction(&tx_index) + .map_or(0u64, |tx| tx.payload().len() as u64), + ) + }, + ); + + BlockDetail:: { + hash: block_header.commitment(), + height: block_header.height(), + time: Timestamp( + OffsetDateTime::from_unix_timestamp(block_header.timestamp() as i64) + .unwrap_or(OffsetDateTime::UNIX_EPOCH), + ), + proposer_id: block_header.proposer_id(), + num_transactions, + block_reward: vec![block_header.fee_info_balance().into()], + fee_recipient: block_header.fee_info_account(), + size: total_payload_size, + } +} + +/// [ProcessLeafError] represents the error that can occur when processing +/// a [Leaf]. +#[derive(Debug)] +pub enum ProcessLeafError { + BlockSendError(SendError), + VotersSendError(SendError), +} + +impl std::fmt::Display for ProcessLeafError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ProcessLeafError::BlockSendError(err) => { + write!(f, "error sending block detail to sender: {}", err) + } + ProcessLeafError::VotersSendError(err) => { + write!(f, "error sending voters to sender: {}", err) + } + } + } +} + +impl std::error::Error for ProcessLeafError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + ProcessLeafError::BlockSendError(err) => Some(err), + ProcessLeafError::VotersSendError(err) => Some(err), + } + } +} + +/// [process_incoming_leaf] is a helper function that will process an incoming +/// [Leaf] and update the [DataState] with the new information. +/// Additionally, the block that is contained within the [Leaf] will be +/// computed into a [BlockDetail] and sent to the [Sink] so that it can be +/// processed for real-time considerations. +async fn process_incoming_leaf( + leaf: Leaf, + data_state: Arc>, + mut block_sender: BDSink, + mut voters_sender: BVSink, +) -> Result<(), ProcessLeafError> +where + Header: BlockHeader + QueryableHeader + ExplorerHeader, + Payload: BlockPayload, + BDSink: Sink, Error = SendError> + Unpin, + BVSink: Sink, Error = SendError> + Unpin, +{ + let block_detail = create_block_detail_from_leaf(&leaf); + let block_detail_copy = create_block_detail_from_leaf(&leaf); + + let certificate = leaf.justify_qc(); + let signatures = &certificate.signatures; + + // Let's take a look at the quorum certificate signatures. + // It looks like all of these blocks are being decided by the + // same Quorum Certificate. + + // Where's the stake table? + let signatures = signatures.as_ref(); + + // Let's determine the the participants of the voter participants + // in the Quorum Certificate. + + // We shouldn't ever have a BitVec that is empty, with the possible + // exception of the genesis block. + let stake_table_voters_bit_vec = signatures.map_or(Default::default(), |sig| sig.1.clone()); + + // This BitVec should be in the same order as the Stake Table. + // The StakeTable will be able to change its order between epochs, + // which means that its order can change between blocks. + // However, the BitVec is a really nice size in order for storing + // information. We should be able to remap the BitVec order from + // the StakeTable order to our installed order representation. This + // should allow us to still store as a BitVec while containing our + // out order of the voters. + // We will need to recompute these BitVecs if the node information that + // is stored shrinks instead of growing. + + let mut data_state_write_lock_guard = data_state.write().await; + + let stake_table = &data_state_write_lock_guard.stake_table; + let stable_table_entries_vec = stake_table + .try_iter(SnapshotVersion::LastEpochStart) + .map_or(vec![], |into_iter| into_iter.collect::>()); + + // We have a BitVec of voters who signed the QC. + // We can use this to determine the weight of the QC + let stake_table_entry_voter_participation_and_entries_pairs = + zip(stake_table_voters_bit_vec, stable_table_entries_vec); + let stake_table_keys_that_voted = stake_table_entry_voter_participation_and_entries_pairs + .filter(|(bit_ref, _)| *bit_ref) + .map(|(_, entry)| { + // Alright this is our entry that we care about. + // In this case, we just want to determine who voted for this + // Leaf. + + let (key, _, _): (BLSPubKey, _, _) = entry; + key + }); + + let voters_set: HashSet = stake_table_keys_that_voted.collect(); + + let voters_bitvec = data_state_write_lock_guard.node_identity.iter().fold( + BitVec::with_capacity(data_state_write_lock_guard.node_identity.len()), + |mut acc, node_identity| { + acc.push(voters_set.contains(node_identity.public_key())); + acc + }, + ); + + data_state_write_lock_guard + .latest_blocks + .push_back(block_detail); + data_state_write_lock_guard + .latest_voters + .push_back(voters_bitvec.clone()); + + drop(data_state_write_lock_guard); + + if let Err(err) = block_sender.send(block_detail_copy).await { + // We have an error that prevents us from continuing + return Err(ProcessLeafError::BlockSendError(err)); + } + + if let Err(err) = voters_sender.send(voters_bitvec).await { + // We have an error that prevents us from continuing + return Err(ProcessLeafError::VotersSendError(err)); + } + + Ok(()) +} + +/// [ProcessLeafStreamTask] represents the task that is responsible for +/// processing a stream of incoming [Leaf]s. +pub struct ProcessLeafStreamTask { + pub task_handle: Option>, +} + +impl ProcessLeafStreamTask { + /// [new] creates a new [ProcessLeafStreamTask] that will process a stream + /// of incoming [Leaf]s. + /// + /// Calling this function will create an asynchronous task that will start + /// processing immediately. The handle for the task will be stored within + /// the returned structure. + pub fn new( + leaf_receiver: S, + data_state: Arc>, + block_detail_sender: K1, + voters_sender: K2, + ) -> Self + where + S: Stream> + Send + Sync + Unpin + 'static, + K1: Sink, Error = SendError> + Clone + Send + Sync + Unpin + 'static, + K2: Sink, Error = SendError> + Clone + Send + Sync + Unpin + 'static, + { + let task_handle = async_std::task::spawn(Self::process_leaf_stream( + leaf_receiver, + data_state.clone(), + block_detail_sender, + voters_sender, + )); + + Self { + task_handle: Some(task_handle), + } + } + + /// [process_leaf_stream] allows for the consumption of a [Stream] when + /// attempting to process new incoming [Leaf]s. + async fn process_leaf_stream( + mut stream: S, + data_state: Arc>, + block_sender: BDSink, + voters_senders: BVSink, + ) where + S: Stream> + Unpin, + Header: BlockHeader + QueryableHeader + ExplorerHeader, + Payload: BlockPayload, + BDSink: Sink, Error = SendError> + Clone + Unpin, + BVSink: Sink, Error = SendError> + Clone + Unpin, + { + loop { + let leaf_result = stream.next().await; + let leaf = if let Some(leaf) = leaf_result { + leaf + } else { + // We have reached the end of the stream + tracing::error!("process leaf stream: end of stream reached for leaf stream."); + return; + }; + + if let Err(err) = process_incoming_leaf( + leaf, + data_state.clone(), + block_sender.clone(), + voters_senders.clone(), + ) + .await + { + // We have an error that prevents us from continuing + tracing::error!("process leaf stream: error processing leaf: {}", err); + + // At the moment, all underlying errors are due to `SendError` + // which will ultimately mean that further processing attempts + // will fail, and be fruitless. + match err { + ProcessLeafError::BlockSendError(_) => { + panic!("ProcessLeafStreamTask: process_incoming_leaf failed, underlying sink is closed, blocks will stagnate: {}", err) + } + ProcessLeafError::VotersSendError(_) => { + panic!("ProcessLeafStreamTask: process_incoming_leaf failed, underlying sink is closed, voters will stagnate: {}", err) + } + } + } + } + } +} + +/// [Drop] implementation for [ProcessLeafStreamTask] that will cancel the +/// task if it is dropped. +impl Drop for ProcessLeafStreamTask { + fn drop(&mut self) { + let task_handle = self.task_handle.take(); + if let Some(task_handle) = task_handle { + async_std::task::block_on(task_handle.cancel()); + } + } +} + +/// [ProcessNodeIdentityError] represents the error that can occur when processing +/// a [NodeIdentity]. +#[derive(Debug)] +pub enum ProcessNodeIdentityError { + SendError(SendError), +} + +impl std::fmt::Display for ProcessNodeIdentityError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ProcessNodeIdentityError::SendError(err) => { + write!(f, "error sending node identity to sender: {}", err) + } + } + } +} + +impl std::error::Error for ProcessNodeIdentityError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + ProcessNodeIdentityError::SendError(err) => Some(err), + } + } +} + +impl From for ProcessNodeIdentityError { + fn from(err: SendError) -> Self { + ProcessNodeIdentityError::SendError(err) + } +} + +/// [process_incoming_node_identity] is a helper function that will process an +/// incoming [NodeIdentity] and update the [DataState] with the new information. +/// Additionally, the [NodeIdentity] will be sent to the [Sink] so that it can +/// be processed for real-time considerations. +async fn process_incoming_node_identity( + node_identity: NodeIdentity, + data_state: Arc>, + mut node_identity_sender: NISink, +) -> Result<(), ProcessNodeIdentityError> +where + NISink: Sink + Unpin, +{ + let mut data_state_write_lock_guard = data_state.write().await; + data_state_write_lock_guard.add_node_identity(node_identity.clone()); + node_identity_sender.send(node_identity).await?; + + Ok(()) +} + +/// [ProcessNodeIdentityStreamTask] represents the task that is responsible for +/// processing a stream of incoming [NodeIdentity]s and updating the [DataState] +/// with the new information. +pub struct ProcessNodeIdentityStreamTask { + pub task_handle: Option>, +} + +impl ProcessNodeIdentityStreamTask { + /// [new] creates a new [ProcessNodeIdentityStreamTask] that will process a + /// stream of incoming [NodeIdentity]s. + /// + /// Calling this function will create an asynchronous task that will start + /// processing immediately. The handle for the task will be stored within + /// the returned structure. + pub fn new( + node_identity_receiver: S, + data_state: Arc>, + node_identity_sender: K, + ) -> Self + where + S: Stream + Send + Sync + Unpin + 'static, + K: Sink + Clone + Send + Sync + Unpin + 'static, + { + let task_handle = async_std::task::spawn(Self::process_node_identity_stream( + node_identity_receiver, + data_state.clone(), + node_identity_sender, + )); + + Self { + task_handle: Some(task_handle), + } + } + + /// [process_node_identity_stream] allows for the consumption of a [Stream] when + /// attempting to process new incoming [NodeIdentity]s. + /// This function will process the incoming [NodeIdentity] and update the + /// [DataState] with the new information. + /// Additionally, the [NodeIdentity] will be sent to the [Sink] so that it can + /// be processed for real-time considerations. + async fn process_node_identity_stream( + mut stream: S, + data_state: Arc>, + node_identity_sender: NISink, + ) where + S: Stream + Unpin, + NISink: Sink + Clone + Unpin, + { + loop { + let node_identity_result = stream.next().await; + let node_identity = if let Some(node_identity) = node_identity_result { + node_identity + } else { + // We have reached the end of the stream + tracing::info!( + "process node identity stream: end of stream reached for node identity stream." + ); + return; + }; + + if let Err(err) = process_incoming_node_identity( + node_identity, + data_state.clone(), + node_identity_sender.clone(), + ) + .await + { + // We have an error that prevents us from continuing + tracing::error!( + "process node identity stream: error processing node identity: {}", + err + ); + + // The only underlying class of errors that can be returned from + // `process_incoming_node_identity` are due to `SendError` which + // will ultimately mean that further processing attempts will fail + // and be fruitless. + panic!("ProcessNodeIdentityStreamTask: process_incoming_node_identity failed, underlying sink is closed, node identities will stagnate: {}", err); + } + } + } +} + +/// [Drop] implementation for [ProcessNodeIdentityStreamTask] that will cancel +/// the task if it is dropped. +impl Drop for ProcessNodeIdentityStreamTask { + fn drop(&mut self) { + let task_handle = self.task_handle.take(); + if let Some(task_handle) = task_handle { + async_std::task::block_on(task_handle.cancel()); + } + } +} + +#[cfg(test)] +mod tests { + use super::{DataState, ProcessLeafStreamTask}; + use crate::service::data_state::{ + LocationDetails, NodeIdentity, ProcessNodeIdentityStreamTask, + }; + use async_std::{prelude::FutureExt, sync::RwLock}; + use espresso_types::{ + v0_3::ChainConfig, BlockMerkleTree, FeeMerkleTree, Leaf, NodeState, ValidatedState, + }; + use futures::{channel::mpsc, SinkExt, StreamExt}; + use hotshot_types::{signature_key::BLSPubKey, traits::signature_key::SignatureKey}; + use std::{sync::Arc, time::Duration}; + use url::Url; + + #[async_std::test] + async fn test_process_leaf_error_debug() { + let (mut sender, receiver) = mpsc::channel(1); + // deliberately close the receiver. + drop(receiver); + + // Attempt to receive, and we should get an error. + let receive_result = sender.send(1).await; + + assert!(receive_result.is_err()); + let err = receive_result.unwrap_err(); + + let process_leaf_err = super::ProcessLeafError::BlockSendError(err); + + assert_eq!( + format!("{:?}", process_leaf_err), + "BlockSendError(SendError { kind: Disconnected })" + ); + } + + #[async_std::test] + async fn test_process_leaf_stream() { + let data_state: DataState = Default::default(); + let data_state = Arc::new(RwLock::new(data_state)); + let (block_sender, block_receiver) = futures::channel::mpsc::channel(1); + let (voters_sender, voters_receiver) = futures::channel::mpsc::channel(1); + let (leaf_sender, leaf_receiver) = futures::channel::mpsc::channel(1); + + let mut process_leaf_stream_task_handle = ProcessLeafStreamTask::new( + leaf_receiver, + data_state.clone(), + block_sender, + voters_sender, + ); + + { + let data_state = data_state.read().await; + // Latest blocks should be empty + assert_eq!(data_state.latest_blocks().count(), 0); + // Latest voters should be empty + assert_eq!(data_state.latest_voters().count(), 0); + } + + let validated_state = ValidatedState { + block_merkle_tree: BlockMerkleTree::new(32), + fee_merkle_tree: FeeMerkleTree::new(32), + chain_config: ChainConfig::default().into(), + }; + let instance_state = NodeState::mock(); + + let sample_leaf = Leaf::genesis(&validated_state, &instance_state).await; + + let mut leaf_sender = leaf_sender; + // We should be able to send a leaf without issue + assert_eq!(leaf_sender.send(sample_leaf).await, Ok(()),); + + let mut block_receiver = block_receiver; + // We should receive a Block Detail. + + let next_block = block_receiver.next().await; + assert!(next_block.is_some()); + + let mut voters_receiver = voters_receiver; + // We should receive a BitVec of voters. + let next_voters = voters_receiver.next().await; + assert!(next_voters.is_some()); + + { + let data_state = data_state.read().await; + // Latest blocks should now have a single entry + assert_eq!(data_state.latest_blocks().count(), 1); + // Latest voters should now have a single entry + assert_eq!(data_state.latest_voters().count(), 1); + } + + // We explicitly drop these, as it should make the task clean up. + drop(block_receiver); + drop(leaf_sender); + + assert_eq!( + process_leaf_stream_task_handle + .task_handle + .take() + .unwrap() + .timeout(Duration::from_millis(200)) + .await, + Ok(()) + ); + } + + #[async_std::test] + async fn test_process_node_identity_stream() { + let data_state: DataState = Default::default(); + let data_state = Arc::new(RwLock::new(data_state)); + let (node_identity_sender_1, node_identity_receiver_1) = futures::channel::mpsc::channel(1); + let (node_identity_sender_2, node_identity_receiver_2) = futures::channel::mpsc::channel(1); + + let mut process_node_identity_task_handle = ProcessNodeIdentityStreamTask::new( + node_identity_receiver_1, + data_state.clone(), + node_identity_sender_2, + ); + + { + let data_state = data_state.read().await; + // Latest blocks should be empty + assert_eq!(data_state.node_identity().count(), 0); + } + + // Send a node update to the Stream + let public_key_1 = BLSPubKey::generated_from_seed_indexed([0; 32], 0).0; + let node_identity_1 = NodeIdentity::from_public_key(public_key_1); + + let mut node_identity_sender_1 = node_identity_sender_1; + let mut node_identity_receiver_2 = node_identity_receiver_2; + + assert_eq!( + node_identity_sender_1.send(node_identity_1.clone()).await, + Ok(()) + ); + + assert_eq!( + node_identity_receiver_2.next().await, + Some(node_identity_1.clone()) + ); + + { + let data_state = data_state.read().await; + // Latest blocks should now have a single entry + assert_eq!(data_state.node_identity().count(), 1); + assert_eq!(data_state.node_identity().next(), Some(&node_identity_1)); + } + + // If we send the same node identity again, we should not have a new entry. + assert_eq!( + node_identity_sender_1.send(node_identity_1.clone()).await, + Ok(()) + ); + + assert_eq!( + node_identity_receiver_2.next().await, + Some(node_identity_1.clone()) + ); + + { + let data_state = data_state.read().await; + // Latest blocks should now have a single entry + assert_eq!(data_state.node_identity().count(), 1); + assert_eq!(data_state.node_identity().next(), Some(&node_identity_1)); + } + + // If we send an update for that node instead, it should update the + // entry. + let node_identity_1 = NodeIdentity::new( + public_key_1, + Some("name".to_string()), + Some(Url::parse("https://example.com/").unwrap()), + Some("company".to_string()), + Some(Url::parse("https://example.com/").unwrap()), + Some(LocationDetails::new( + Some((40.7128, -74.0060)), + Some("US".to_string()), + )), + Some("operating_system".to_string()), + Some("node_type".to_string()), + Some("network_type".to_string()), + ); + assert_eq!( + node_identity_sender_1.send(node_identity_1.clone()).await, + Ok(()) + ); + + assert_eq!( + node_identity_receiver_2.next().await, + Some(node_identity_1.clone()) + ); + + { + let data_state = data_state.read().await; + // Latest blocks should now have a single entry + assert_eq!(data_state.node_identity().count(), 1); + assert_eq!(data_state.node_identity().next(), Some(&node_identity_1)); + } + + // If we send a new node identity, it should result in a new node + // identity + + let public_key_2 = BLSPubKey::generated_from_seed_indexed([0; 32], 1).0; + let node_identity_2 = NodeIdentity::from_public_key(public_key_2); + + assert_eq!( + node_identity_sender_1.send(node_identity_2.clone()).await, + Ok(()) + ); + + assert_eq!( + node_identity_receiver_2.next().await, + Some(node_identity_2.clone()) + ); + + { + let data_state = data_state.read().await; + // Latest blocks should now have a single entry + assert_eq!(data_state.node_identity().count(), 2); + assert_eq!(data_state.node_identity().next(), Some(&node_identity_1)); + assert_eq!(data_state.node_identity().last(), Some(&node_identity_2)); + } + + // We explicitly drop these, as it should make the task clean up. + drop(node_identity_sender_1); + + if let Some(process_node_identity_task_handle) = + process_node_identity_task_handle.task_handle.take() + { + assert_eq!(process_node_identity_task_handle.cancel().await, None); + } + } +} diff --git a/node-metrics/src/service/data_state/node_identity.rs b/node-metrics/src/service/data_state/node_identity.rs new file mode 100644 index 000000000..8396a8134 --- /dev/null +++ b/node-metrics/src/service/data_state/node_identity.rs @@ -0,0 +1,259 @@ +use super::LocationDetails; +use hotshot_types::signature_key::BLSPubKey; +use serde::{Deserialize, Serialize}; +use surf_disco::Url; + +/// [NodeIdentity] represents the identity of the node that is participating +/// in the network. +#[derive(Clone, PartialEq, Debug, Serialize, Deserialize)] +pub struct NodeIdentity { + pub(crate) public_key: BLSPubKey, + pub(crate) name: Option, + pub(crate) public_url: Option, + pub(crate) company: Option, + pub(crate) company_website: Option, + pub(crate) location: Option, + pub(crate) operating_system: Option, + + /// note_type is meant to reflect the type of the node that is being + /// run. The simplest representation of this value is the specific + /// binary program that is running for the node. In the case of the + /// Espresso sequencer, this is expected to be the value: + /// "espresso-sequencer ". + /// + /// Other implementations may use their own values instead. + pub(crate) node_type: Option, + + /// network_type is meant to represent the type of network that the node is + /// connected to. The sample specification has the following values + /// suggested: + /// - residential + /// - hosting + /// + /// It is preferred to have some present values we would like for these + /// to be, but for flexibility it is set to be a generic String. + /// Proposed values: + /// - Residential + /// - AWS + /// - Azure + /// - GCP + /// + /// These could also potentially include the availability zone for the + /// hosted networks: + /// - AWS (us-east-1) + /// + /// This could potentially even be: + /// - AWS (us-east-1a) + pub(crate) network_type: Option, +} + +impl NodeIdentity { + #[allow(clippy::too_many_arguments)] + pub fn new( + public_key: BLSPubKey, + name: Option, + public_url: Option, + company: Option, + company_website: Option, + location: Option, + operating_system: Option, + node_type: Option, + network_type: Option, + ) -> Self { + Self { + public_key, + name, + public_url, + company, + company_website, + location, + operating_system, + node_type, + network_type, + } + } + + pub fn public_key(&self) -> &BLSPubKey { + &self.public_key + } + + pub fn name(&self) -> &Option { + &self.name + } + + pub fn public_url(&self) -> &Option { + &self.public_url + } + + pub fn company(&self) -> &Option { + &self.company + } + + pub fn company_website(&self) -> &Option { + &self.company_website + } + + pub fn location(&self) -> Option<&LocationDetails> { + self.location.as_ref() + } + + pub fn operating_system(&self) -> &Option { + &self.operating_system + } + + pub fn node_type(&self) -> &Option { + &self.node_type + } + + pub fn network_type(&self) -> &Option { + &self.network_type + } + + pub fn from_public_key(public_key: BLSPubKey) -> Self { + Self { + public_key, + name: None, + public_url: None, + company: None, + company_website: None, + location: None, + operating_system: None, + node_type: None, + network_type: None, + } + } +} + +#[cfg(test)] +pub mod tests { + use super::LocationDetails; + use super::NodeIdentity; + use hotshot_types::signature_key::BLSPubKey; + use hotshot_types::traits::signature_key::SignatureKey; + + pub fn create_test_node(index: u64) -> NodeIdentity { + let (pub_key, _) = BLSPubKey::generated_from_seed_indexed([0; 32], index); + + NodeIdentity::new( + pub_key, + Some("a".to_string()), + Some("https://espressosys.com/".parse().unwrap()), + Some("company".to_string()), + Some("https://example.com/".parse().unwrap()), + Some(LocationDetails::new( + Some((0.0, 0.0)), + Some("US".to_string()), + )), + Some("Windows 11".to_string()), + Some("espresso".to_string()), + Some("residential".to_string()), + ) + } + + #[test] + fn test_node_identity_eq() { + let node_identity_1 = create_test_node(1); + let node_identity_2 = create_test_node(1); + let node_identity_3 = create_test_node(2); + + assert_eq!(node_identity_1, node_identity_2); + assert_ne!(node_identity_1, node_identity_3); + assert_ne!(node_identity_2, node_identity_3); + } + + #[test] + fn test_node_identity_eq_clone() { + let node_identity_1 = create_test_node(1); + let node_identity_2 = node_identity_1.clone(); + + assert_eq!(node_identity_1, node_identity_2); + } + + #[test] + #[cfg(feature = "testing")] + fn test_node_identity_serialize() { + use serde_json; + + let node_identity = create_test_node(1); + let serialized = serde_json::to_string(&node_identity).unwrap(); + let deserialized: NodeIdentity = serde_json::from_str(&serialized).unwrap(); + + assert_eq!(node_identity, deserialized); + } + + #[test] + fn test_node_identity_public_key() { + let node_identity = create_test_node(1); + let public_key = node_identity.public_key(); + + assert_eq!( + public_key, + &BLSPubKey::generated_from_seed_indexed([0; 32], 1).0 + ); + } + + #[test] + fn test_node_identity_name() { + let node_identity = create_test_node(1); + let name = node_identity.name(); + + assert_eq!(name, &Some("a".to_string())); + } + + #[test] + fn test_node_identity_public_url() { + let node_identity = create_test_node(1); + let public_url = node_identity.public_url(); + + assert_eq!( + public_url, + &Some("https://espressosys.com/".parse().unwrap()), + ); + } + + #[test] + fn test_node_identity_company() { + let node_identity = create_test_node(1); + let company = node_identity.company(); + + assert_eq!(company, &Some("company".to_string())); + } + + #[test] + fn test_node_identity_location() { + let node_identity = create_test_node(1); + let location = node_identity.location(); + + assert_eq!( + location, + Some(&LocationDetails::new( + Some((0.0, 0.0)), + Some("US".to_string()) + )) + ); + } + + #[test] + fn test_node_identity_operating_system() { + let node_identity = create_test_node(1); + let operating_system = node_identity.operating_system(); + + assert_eq!(operating_system, &Some("Windows 11".to_string())); + } + + #[test] + fn test_node_identity_node_type() { + let node_identity = create_test_node(1); + let node_type = node_identity.node_type(); + + assert_eq!(node_type, &Some("espresso".to_string())); + } + + #[test] + fn test_node_identity_network_type() { + let node_identity = create_test_node(1); + let network_type = node_identity.network_type(); + + assert_eq!(network_type, &Some("residential".to_string())); + } +} diff --git a/node-metrics/src/service/mod.rs b/node-metrics/src/service/mod.rs new file mode 100644 index 000000000..90b95b953 --- /dev/null +++ b/node-metrics/src/service/mod.rs @@ -0,0 +1,6 @@ +pub mod client_id; +pub mod client_message; +pub mod client_state; +pub mod data_state; +pub mod node_type; +pub mod server_message; diff --git a/node-metrics/src/service/node_type/mod.rs b/node-metrics/src/service/node_type/mod.rs new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/node-metrics/src/service/node_type/mod.rs @@ -0,0 +1 @@ + diff --git a/node-metrics/src/service/server_message/mod.rs b/node-metrics/src/service/server_message/mod.rs new file mode 100644 index 000000000..9cb7cd798 --- /dev/null +++ b/node-metrics/src/service/server_message/mod.rs @@ -0,0 +1,62 @@ +use std::sync::Arc; + +use super::{client_id::ClientId, data_state::NodeIdentity}; +use bitvec::vec::BitVec; +use espresso_types::SeqTypes; +use hotshot_query_service::explorer::{BlockDetail, ExplorerHistograms}; +use serde::{Deserialize, Serialize}; + +/// [ServerMessage] represents the messages that the server can send to the +/// client for a response. +#[derive(Debug, Serialize, Deserialize)] +pub enum ServerMessage { + /// This allows the client to know what client_id they have been assigned + YouAre(ClientId), + + /// LatestBlock is a message that is meant to show the most recent block + /// that has arrived. + LatestBlock(Arc>), + + /// LatestNodeIdentity is a message that is meant to show the most recent + /// node identity that has arrived. + LatestNodeIdentity(Arc), + + /// LatestVoters is a message that is meant to show the most recent + /// voters that have arrived. + LatestVoters(BitVec), + + /// BlocksSnapshot is a message that is sent in response to a request for + /// the snapshot of block information that is available. + BlocksSnapshot(Arc>>), + + /// NodeIdentitySnapshot is a message that is sent in response to a request + /// for the snapshot of the current node identity information. + NodeIdentitySnapshot(Arc>), + + /// HistogramSnapshot is a message that is sent in response to to a request + /// for the snapshot of the current histogram information. + HistogramSnapshot(Arc), + + /// VotersSnapshot is a message that is sent in response to a request for + /// the snapshot of the current voters information. + VotersSnapshot(Arc>>), +} + +impl PartialEq for ServerMessage { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (Self::YouAre(lhs), Self::YouAre(rhg)) => lhs == rhg, + (Self::LatestBlock(lhs), Self::LatestBlock(rhs)) => lhs == rhs, + (Self::LatestNodeIdentity(lhs), Self::LatestNodeIdentity(rhs)) => lhs == rhs, + (Self::LatestVoters(lhs), Self::LatestVoters(rhs)) => lhs == rhs, + (Self::BlocksSnapshot(lhs), Self::BlocksSnapshot(rhs)) => lhs == rhs, + (Self::NodeIdentitySnapshot(lhs), Self::NodeIdentitySnapshot(rhs)) => lhs == rhs, + (Self::HistogramSnapshot(_), Self::HistogramSnapshot(_)) => false, + (Self::VotersSnapshot(lhs), Self::VotersSnapshot(rhs)) => lhs == rhs, + _ => false, + } + } +} + +#[cfg(test)] +mod tests {} diff --git a/process-compose.yaml b/process-compose.yaml index 2bbb184bf..0dc060042 100644 --- a/process-compose.yaml +++ b/process-compose.yaml @@ -117,6 +117,16 @@ processes: - ESPRESSO_SEQUENCER_PRIVATE_STAKING_KEY=$ESPRESSO_DEMO_SEQUENCER_STAKING_PRIVATE_KEY_0 - ESPRESSO_SEQUENCER_PRIVATE_STATE_KEY=$ESPRESSO_DEMO_SEQUENCER_STATE_PRIVATE_KEY_0 - ESPRESSO_SEQUENCER_IS_DA=true + - ESPRESSO_SEQUENCER_IDENTITY_NODE_NAME=sequencer0 + - ESPRESSO_SEQUENCER_IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000000 + - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_NAME=Espresso Systems + - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_WEBSITE=https://www.espressosys.com/ + - ESPRESSO_SEQUENCER_IDENTITY_OPERATING_SYSTEM=Linux 5.15.153.1 + - ESPRESSO_SEQUENCER_IDENTITY_NETWORK_TYPE=local + - ESPRESSO_SEQUENCER_IDENTITY_COUNTRY_CODE=US + - ESPRESSO_SEQUENCER_IDENTITY_LATITUDE=40.7128 + - ESPRESSO_SEQUENCER_IDENTITY_LONGITUDE=-74.0060 + - ESPRESSO_SEQUENCER_PUBLIC_API_URL=http://localhost:$ESPRESSO_SEQUENCER_API_PORT/ depends_on: orchestrator: condition: process_healthy @@ -159,6 +169,16 @@ processes: - ESPRESSO_SEQUENCER_PRIVATE_STAKING_KEY=$ESPRESSO_DEMO_SEQUENCER_STAKING_PRIVATE_KEY_1 - ESPRESSO_SEQUENCER_PRIVATE_STATE_KEY=$ESPRESSO_DEMO_SEQUENCER_STATE_PRIVATE_KEY_1 - ESPRESSO_SEQUENCER_IS_DA=true + - ESPRESSO_SEQUENCER_IDENTITY_NODE_NAME=sequencer1 + - ESPRESSO_SEQUENCER_IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000001 + - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_NAME=Espresso Systems + - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_WEBSITE=https://www.espressosys.com/ + - ESPRESSO_SEQUENCER_IDENTITY_OPERATING_SYSTEM=Darwin 23.5.0 + - ESPRESSO_SEQUENCER_IDENTITY_NETWORK_TYPE=local + - ESPRESSO_SEQUENCER_IDENTITY_COUNTRY_CODE=GR + - ESPRESSO_SEQUENCER_IDENTITY_LATITUDE=39.0742 + - ESPRESSO_SEQUENCER_IDENTITY_LONGITUDE=21.8243 + - ESPRESSO_SEQUENCER_PUBLIC_API_URL=http://localhost:$ESPRESSO_SEQUENCER1_API_PORT/ depends_on: orchestrator: condition: process_healthy @@ -195,6 +215,16 @@ processes: - ESPRESSO_SEQUENCER_PRIVATE_STAKING_KEY=$ESPRESSO_DEMO_SEQUENCER_STAKING_PRIVATE_KEY_2 - ESPRESSO_SEQUENCER_PRIVATE_STATE_KEY=$ESPRESSO_DEMO_SEQUENCER_STATE_PRIVATE_KEY_2 - ESPRESSO_SEQUENCER_IS_DA=true + - ESPRESSO_SEQUENCER_IDENTITY_NODE_NAME=sequencer2 + - ESPRESSO_SEQUENCER_IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000002 + - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_NAME=Espresso Systems + - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_WEBSITE=https://www.espressosys.com/ + - ESPRESSO_SEQUENCER_IDENTITY_OPERATING_SYSTEM=Darwin 23.5.0 + - ESPRESSO_SEQUENCER_IDENTITY_NETWORK_TYPE=local + - ESPRESSO_SEQUENCER_IDENTITY_COUNTRY_CODE=CN + - ESPRESSO_SEQUENCER_IDENTITY_LATITUDE=35.8617 + - ESPRESSO_SEQUENCER_IDENTITY_LONGITUDE=104.1954 + - ESPRESSO_SEQUENCER_PUBLIC_API_URL=http://localhost:$ESPRESSO_SEQUENCER2_API_PORT/ depends_on: orchestrator: condition: process_healthy @@ -228,6 +258,17 @@ processes: - ESPRESSO_SEQUENCER_STORAGE_PATH=$ESPRESSO_BASE_STORAGE_PATH/seq3 - ESPRESSO_SEQUENCER_PRIVATE_STAKING_KEY=$ESPRESSO_DEMO_SEQUENCER_STAKING_PRIVATE_KEY_3 - ESPRESSO_SEQUENCER_PRIVATE_STATE_KEY=$ESPRESSO_DEMO_SEQUENCER_STATE_PRIVATE_KEY_3 + - ESPRESSO_SEQUENCER_IDENTITY_NODE_NAME=sequencer3 + - ESPRESSO_SEQUENCER_IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000003 + - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_NAME=Espresso Systems + - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_WEBSITE=https://www.espressosys.com/ + - ESPRESSO_SEQUENCER_IDENTITY_OPERATING_SYSTEM=Microsoft Windows NT 10.0.22621.0 + - ESPRESSO_SEQUENCER_IDENTITY_E=espresso-sequencer@0.1.0 + - ESPRESSO_SEQUENCER_IDENTITY_NETWORK_TYPE=local + - ESPRESSO_SEQUENCER_IDENTITY_COUNTRY_CODE=CN + - ESPRESSO_SEQUENCER_IDENTITY_LATITUDE=35.8617 + - ESPRESSO_SEQUENCER_IDENTITY_LONGITUDE=104.1954 + - ESPRESSO_SEQUENCER_PUBLIC_API_URL=http://localhost:$ESPRESSO_SEQUENCER3_API_PORT/ depends_on: orchestrator: condition: process_healthy @@ -259,6 +300,16 @@ processes: - ESPRESSO_SEQUENCER_STORAGE_PATH=$ESPRESSO_BASE_STORAGE_PATH/seq4 - ESPRESSO_SEQUENCER_PRIVATE_STAKING_KEY=$ESPRESSO_DEMO_SEQUENCER_STAKING_PRIVATE_KEY_4 - ESPRESSO_SEQUENCER_PRIVATE_STATE_KEY=$ESPRESSO_DEMO_SEQUENCER_STATE_PRIVATE_KEY_4 + - ESPRESSO_SEQUENCER_IDENTITY_NODE_NAME=sequencer4 + - ESPRESSO_SEQUENCER_IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000004 + - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_NAME=Espresso Systems + - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_WEBSITE=https://www.espressosys.com/ + - ESPRESSO_SEQUENCER_IDENTITY_OPERATING_SYSTEM=TempleOS 5.03 + - ESPRESSO_SEQUENCER_IDENTITY_NETWORK_TYPE=local + - ESPRESSO_SEQUENCER_IDENTITY_COUNTRY_CODE=AU + - ESPRESSO_SEQUENCER_IDENTITY_LATITUDE=-25.2744 + - ESPRESSO_SEQUENCER_IDENTITY_LONGITUDE=133.7751 + - ESPRESSO_SEQUENCER_PUBLIC_API_URL=http://localhost:$ESPRESSO_SEQUENCER4_API_PORT/ depends_on: orchestrator: condition: process_healthy @@ -282,6 +333,36 @@ processes: path: /healthcheck failure_threshold: 100 + node_validator: + command: node-metrics -- + environment: + - ESPRESSO_NODE_VALIDATOR_STAKE_TABLE_SOURCE_BASE_URL=http://localhost:$ESPRESSO_SEQUENCER_API_PORT/v0/ + - ESPRESSO_NODE_VALIDATOR_LEAF_STREAM_SOURCE_BASE_URL=http://localhost:$ESPRESSO_SEQUENCER_API_PORT/v0/ + - ESPRESSO_NODE_VALIDATOR_INITIAL_NODE_PUBLIC_BASE_URLS=http://localhost:$ESPRESSO_SEQUENCER_API_PORT,http://localhost:$ESPRESSO_SEQUENCER1_API_PORT,http://localhost:$ESPRESSO_SEQUENCER2_API_PORT,http://localhost:$ESPRESSO_SEQUENCER3_API_PORT,http://localhost:$ESPRESSO_SEQUENCER4_API_PORT + depends_on: + broker_0: + condition: process_healthy + broker_1: + condition: process_healthy + sequencer0: + condition: process_healthy + sequencer1: + condition: process_healthy + sequencer2: + condition: process_healthy + sequencer3: + condition: process_healthy + sequencer4: + condition: process_healthy + readiness_probe: + http_get: + scheme: http + host: localhost + port: $ESPRESSO_SEQUENCER1_API_PORT + path: /healthcheck + failure_threshold: 100 + + # We use KeyDB (a Redis variant) to maintain consistency between # different parts of the CDN # Cheating a bit here too, but KeyDB is not available as a Nix package. diff --git a/scripts/build-docker-images b/scripts/build-docker-images index 072494efb..806621c46 100755 --- a/scripts/build-docker-images +++ b/scripts/build-docker-images @@ -62,4 +62,5 @@ docker build -t ghcr.io/espressosystems/espresso-sequencer/builder:main -f docke docker build -t ghcr.io/espressosystems/espresso-sequencer/nasty-client:main -f docker/nasty-client.Dockerfile ${WORKDIR} docker build -t ghcr.io/espressosystems/espresso-sequencer/espresso-dev-node:main -f docker/espresso-dev-node.Dockerfile ${WORKDIR} docker build -t ghcr.io/espressosystems/espresso-sequencer/bridge:main -f docker/espresso-bridge.Dockerfile ${WORKDIR} -docker build -t ghcr.io/espressosystems/espresso-sequencer/marketplace-solver:main -f docker/marketplace-solver.Dockerfile ${WORKDIR} \ No newline at end of file +docker build -t ghcr.io/espressosystems/espresso-sequencer/marketplace-solver:main -f docker/marketplace-solver.Dockerfile ${WORKDIR} +docker build -t ghcr.io/espressosystems/espresso-sequencer/node-validator:main -f docker/node-validator.Dockerfile ${WORKDIR} \ No newline at end of file diff --git a/scripts/build-docker-images-native b/scripts/build-docker-images-native index 6037ce137..ed409a68f 100755 --- a/scripts/build-docker-images-native +++ b/scripts/build-docker-images-native @@ -118,3 +118,4 @@ docker build --platform $PLATFORM -t ghcr.io/espressosystems/espresso-sequencer/ docker build --platform $PLATFORM -t ghcr.io/espressosystems/espresso-sequencer/espresso-dev-node:main -f docker/espresso-dev-node.Dockerfile ${WORKDIR} docker build --platform $PLATFORM -t ghcr.io/espressosystems/espresso-sequencer/bridge:main -f docker/espresso-bridge.Dockerfile ${WORKDIR} docker build --platform $PLATFORM -t ghcr.io/espressosystems/espresso-sequencer/marketplace-solver:main -f docker/marketplace-solver.Dockerfile ${WORKDIR} +docker build --platform $PLATFORM -t ghcr.io/espressosystems/espresso-sequencer/node-validator:main -f docker/node-validator.Dockerfile ${WORKDIR} diff --git a/sequencer/src/context.rs b/sequencer/src/context.rs index c0983d836..9b2e18c55 100644 --- a/sequencer/src/context.rs +++ b/sequencer/src/context.rs @@ -15,7 +15,7 @@ use futures::{ }; use hotshot::{ traits::election::static_committee::GeneralStaticCommittee, - types::{Event, SystemContextHandle}, + types::{Event, EventType, SystemContextHandle}, Memberships, SystemContext, }; use hotshot_events_service::events_source::{EventConsumer, EventsStreamer}; @@ -34,7 +34,11 @@ use hotshot_types::{ use url::Url; use vbs::version::StaticVersionType; -use crate::{state_signature::StateSigner, static_stake_table_commitment, Node, SeqTypes}; +use crate::{ + external_event_handler::{self, ExternalEventHandler}, + state_signature::StateSigner, + static_stake_table_commitment, Node, SeqTypes, +}; /// The consensus handle pub type Consensus = SystemContextHandle>; @@ -83,6 +87,7 @@ impl, P: SequencerPersistence, Ver: StaticVersionTyp state_relay_server: Option, metrics: &dyn Metrics, stake_table_capacity: u64, + public_api_url: Option, _: Ver, ) -> anyhow::Result { let config = &network_config.config; @@ -141,7 +146,7 @@ impl, P: SequencerPersistence, Ver: StaticVersionTyp instance_state.node_id, config.clone(), memberships, - network, + network.clone(), initializer, ConsensusMetricsValue::new(metrics), persistence.clone(), @@ -155,10 +160,18 @@ impl, P: SequencerPersistence, Ver: StaticVersionTyp state_signer = state_signer.with_relay_server(url); } + // Create the roll call info we will be using + let roll_call_info = external_event_handler::RollCallInfo { public_api_url }; + + // Create the external event handler + let external_event_handler = ExternalEventHandler::new(network, roll_call_info, pub_key) + .with_context(|| "Failed to create external event handler")?; + Ok(Self::new( handle, persistence, state_signer, + external_event_handler, event_streamer, instance_state, network_config, @@ -170,6 +183,7 @@ impl, P: SequencerPersistence, Ver: StaticVersionTyp handle: Consensus, persistence: Arc>, state_signer: StateSigner, + external_event_handler: ExternalEventHandler, event_streamer: Arc>>, node_state: NodeState, config: NetworkConfig, @@ -192,6 +206,7 @@ impl, P: SequencerPersistence, Ver: StaticVersionTyp events, persistence, ctx.state_signer.clone(), + external_event_handler, Some(event_streamer.clone()), ), ); @@ -318,6 +333,7 @@ async fn handle_events( mut events: impl Stream> + Unpin, persistence: Arc>, state_signer: Arc>, + external_event_handler: ExternalEventHandler, events_streamer: Option>>>, ) { while let Some(event) = events.next().await { @@ -331,6 +347,13 @@ async fn handle_events( // Generate state signature. state_signer.handle_event(&event).await; + // Handle external messages + if let EventType::ExternalMessageReceived(external_message_bytes) = &event.event { + if let Err(err) = external_event_handler.handle_event(external_message_bytes) { + tracing::warn!("Failed to handle external message: {:?}", err); + }; + } + // Send the event via the event streaming service if let Some(events_streamer) = events_streamer.as_ref() { events_streamer.write().await.handle_event(event).await; diff --git a/sequencer/src/external_event_handler.rs b/sequencer/src/external_event_handler.rs new file mode 100644 index 000000000..5fa177da0 --- /dev/null +++ b/sequencer/src/external_event_handler.rs @@ -0,0 +1,181 @@ +//! Should probably rename this to "external" or something + +use crate::context::TaskList; +use anyhow::{Context, Result}; +use async_compatibility_layer::channel::{Receiver, Sender}; +use espresso_types::{PubKey, SeqTypes}; +use hotshot::types::{BLSPubKey, Message}; +use hotshot_types::{ + message::{MessageKind, VersionedMessage}, + traits::network::{BroadcastDelay, ConnectedNetwork, Topic}, +}; +use serde::{Deserialize, Serialize}; +use std::sync::Arc; +use url::Url; + +/// An external message that can be sent to or received from a node +#[derive(Debug, Serialize, Deserialize, Clone)] +pub enum ExternalMessage { + /// A request for a node to respond with its identifier + /// Contains the public key of the node that is requesting the roll call + RollCallRequest(BLSPubKey), + + /// A response to a roll call request + /// Contains the identifier of the node + RollCallResponse(RollCallInfo), +} + +/// Information about a node that is used in a roll call response +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RollCallInfo { + // The public API URL of the node + pub public_api_url: Option, +} + +/// The external event handler state +pub struct ExternalEventHandler { + // The `RollCallInfo` of the node (used in the roll call response) + pub roll_call_info: RollCallInfo, + + // The public key of the node + pub public_key: BLSPubKey, + + // The tasks that are running + pub _tasks: TaskList, + + // The outbound message queue + pub outbound_message_sender: Sender, +} + +// The different types of outbound messages (broadcast or direct) +#[derive(Debug)] +pub enum OutboundMessage { + Direct(Vec, PubKey), + Broadcast(Vec), +} + +impl ExternalEventHandler { + /// Creates a new `ExternalEventHandler` with the given network and roll call info + pub fn new>( + network: Arc, + roll_call_info: RollCallInfo, + public_key: BLSPubKey, + ) -> Result { + // Create the outbound message queue + let (outbound_message_sender, outbound_message_receiver) = + async_compatibility_layer::channel::bounded(10); + + let mut tasks: TaskList = Default::default(); + + // Spawn the outbound message handling loop + tasks.spawn( + "ExternalEventHandler (RollCall)", + Self::outbound_message_loop(outbound_message_receiver, network), + ); + + // We just started, so queue an outbound RollCall message (if we have a public API URL) + if roll_call_info.public_api_url.is_some() { + let roll_call_message_bytes = + Self::create_roll_call_response(&public_key, &roll_call_info) + .with_context(|| "Failed to create roll call response for initial broadcast")?; + + outbound_message_sender + .try_send(OutboundMessage::Broadcast(roll_call_message_bytes)) + .with_context(|| "External outbound message queue is somehow full")?; + } + + Ok(Self { + roll_call_info, + public_key, + _tasks: tasks, + outbound_message_sender, + }) + } + + /// Handles an event + /// + /// # Errors + /// If the message type is unknown or if there is an error serializing or deserializing the message + pub fn handle_event(&self, external_message_bytes: &[u8]) -> Result<()> { + // Deserialize the external message + let external_message = bincode::deserialize(external_message_bytes) + .with_context(|| "Failed to deserialize external message")?; + + // Match the type + match external_message { + ExternalMessage::RollCallRequest(pub_key) => { + if self.roll_call_info.public_api_url.is_none() { + // We don't have a public API URL, so we can't respond to the roll call + return Ok(()); + } + + let response_bytes = + Self::create_roll_call_response(&self.public_key, &self.roll_call_info) + .with_context(|| { + "Failed to serialize roll call response for RollCallRequest" + })?; + + // Send the response + self.outbound_message_sender + .try_send(OutboundMessage::Direct(response_bytes, pub_key)) + .with_context(|| "External outbound message queue is full")?; + } + + _ => { + return Err(anyhow::anyhow!("Unknown external message type")); + } + } + Ok(()) + } + + /// Creates a roll call response message + fn create_roll_call_response( + public_key: &BLSPubKey, + roll_call_info: &RollCallInfo, + ) -> Result> { + let response = ExternalMessage::RollCallResponse(roll_call_info.clone()); + + // Serialize the response + let response_bytes = bincode::serialize(&response) + .with_context(|| "Failed to serialize roll call response")?; + + let message = Message:: { + sender: *public_key, + kind: MessageKind::::External(response_bytes), + }; + + let response_bytes = + as VersionedMessage>::serialize(&message, &None) + .with_context(|| "Failed to serialize roll call response")?; + + Ok(response_bytes) + } + + /// The main loop for sending outbound messages. + async fn outbound_message_loop>( + mut receiver: Receiver, + network: Arc, + ) { + while let Ok(message) = receiver.recv().await { + // Match the message type + match message { + OutboundMessage::Direct(message, recipient) => { + // Send the message directly to the recipient + if let Err(err) = network.direct_message(message, recipient).await { + tracing::error!("Failed to send message: {:?}", err); + }; + } + + OutboundMessage::Broadcast(message) => { + // Broadcast the message to the global topic + if let Err(err) = network + .broadcast_message(message, Topic::Global, BroadcastDelay::None) + .await + { + tracing::error!("Failed to broadcast message: {:?}", err); + }; + } + } + } + } +} diff --git a/sequencer/src/lib.rs b/sequencer/src/lib.rs index 409d28be3..8ba94f06f 100644 --- a/sequencer/src/lib.rs +++ b/sequencer/src/lib.rs @@ -3,6 +3,7 @@ pub mod catchup; pub mod context; pub mod genesis; +mod external_event_handler; pub mod hotshot_commitment; pub mod options; pub mod state_signature; @@ -22,6 +23,7 @@ use hotshot_example_types::auction_results_provider_types::TestAuctionResultsPro // Should move `STAKE_TABLE_CAPACITY` in the sequencer repo when we have variate stake table support use libp2p::Multiaddr; use network::libp2p::split_off_peer_id; +use options::Identity; use state_signature::static_stake_table_commitment; use url::Url; pub mod persistence; @@ -104,6 +106,8 @@ pub struct NetworkParams { pub state_peers: Vec, pub config_peers: Option>, pub catchup_backoff: BackoffParams, + /// The address to advertise as our public API's URL + pub public_api_url: Option, /// The address to send to other Libp2p nodes to contact us pub libp2p_advertise_address: SocketAddr, @@ -119,6 +123,7 @@ pub struct L1Params { pub events_max_block_range: u64, } +#[allow(clippy::too_many_arguments)] pub async fn init_node( genesis: Genesis, network_params: NetworkParams, @@ -127,6 +132,7 @@ pub async fn init_node( l1_params: L1Params, bind_version: Ver, is_da: bool, + identity: Identity, ) -> anyhow::Result> { // Expose git information via status API. metrics @@ -140,6 +146,49 @@ pub async fn init_node( env!("VERGEN_GIT_COMMIT_TIMESTAMP").into(), ]); + // Expose Node Entity Information via the status/metrics API + metrics + .text_family( + "node_identity_general".into(), + vec![ + "name".into(), + "company_name".into(), + "company_website".into(), + "operating_system".into(), + "node_type".into(), + "network_type".into(), + ], + ) + .create(vec![ + identity.node_name.unwrap_or("".into()), + identity.company_name.unwrap_or("".into()), + identity + .company_website + .map(|u| u.into()) + .unwrap_or("".into()), + identity.operating_system.unwrap_or("".into()), + identity.node_type.unwrap_or("".into()), + identity.network_type.unwrap_or("".into()), + ]); + + // Expose Node Identity Location via the status/metrics API + metrics + .text_family( + "node_identity_location".into(), + vec!["country".into(), "latitude".into(), "longitude".into()], + ) + .create(vec![ + identity.country_code.unwrap_or("".into()), + identity + .latitude + .map(|l| l.to_string()) + .unwrap_or("".into()), + identity + .longitude + .map(|l| l.to_string()) + .unwrap_or("".into()), + ]); + // Stick our public key in `metrics` so it is easily accessible via the status API. let pub_key = BLSPubKey::from_private(&network_params.private_staking_key); metrics @@ -354,6 +403,7 @@ pub async fn init_node( Some(network_params.state_relay_server_url), metrics, genesis.stake_table.capacity, + network_params.public_api_url, bind_version, ) .await?; @@ -701,6 +751,7 @@ pub mod testing { self.state_relay_url.clone(), metrics, stake_table_capacity, + None, // The public API URL bind_version, ) .await diff --git a/sequencer/src/main.rs b/sequencer/src/main.rs index 757e104ed..ab5588337 100644 --- a/sequencer/src/main.rs +++ b/sequencer/src/main.rs @@ -89,6 +89,7 @@ where libp2p_bootstrap_nodes: opt.libp2p_bootstrap_nodes, orchestrator_url: opt.orchestrator_url, state_relay_server_url: opt.state_relay_server_url, + public_api_url: opt.public_api_url, private_staking_key, private_state_key, state_peers: opt.state_peers, @@ -140,6 +141,7 @@ where l1_params, bind_version, opt.is_da, + opt.identity, ) .await .unwrap() @@ -159,6 +161,7 @@ where l1_params, bind_version, opt.is_da, + opt.identity, ) .await? } diff --git a/sequencer/src/options.rs b/sequencer/src/options.rs index 21c3316cc..5e367c201 100644 --- a/sequencer/src/options.rs +++ b/sequencer/src/options.rs @@ -76,6 +76,11 @@ pub struct Options { )] pub libp2p_bind_address: String, + /// The URL we advertise to other nodes as being for our public API. + /// Should be supplied in `http://host:port` form. + #[clap(long, env = "ESPRESSO_SEQUENCER_PUBLIC_API_URL")] + pub public_api_url: Option, + /// The address we advertise to other nodes as being a Libp2p endpoint. /// Should be supplied in `host:port` form. #[clap( @@ -205,6 +210,9 @@ pub struct Options { #[clap(flatten)] pub logging: logging::Config, + + #[clap(flatten)] + pub identity: Identity, } impl Options { @@ -235,6 +243,41 @@ impl Options { } } +/// Identity represents identifying information concerning the sequencer node. +/// This information is used to populate relevant information in the metrics +/// endpoint. This information will also potentially be scraped and displayed +/// in a public facing dashboard. +#[derive(Parser, Clone, Derivative)] +#[derivative(Debug(bound = ""))] +pub struct Identity { + #[clap(long, env = "ESPRESSO_SEQUENCER_IDENTITY_COUNTRY_CODE")] + pub country_code: Option, + #[clap(long, env = "ESPRESSO_SEQUENCER_IDENTITY_LATITUDE")] + pub latitude: Option, + #[clap(long, env = "ESPRESSO_SEQUENCER_IDENTITY_LONGITUDE")] + pub longitude: Option, + + #[clap(long, env = "ESPRESSO_SEQUENCER_IDENTITY_NODE_NAME")] + pub node_name: Option, + + #[clap(long, env = "ESPRESSO_SEQUENCER_IDENTITY_COMPANY_NAME")] + pub company_name: Option, + #[clap(long, env = "ESPRESSO_SEQUENCER_IDENTITY_COMPANY_WEBSITE")] + pub company_website: Option, + #[clap(long, env = "ESPRESSO_SEQUENCER_IDENTITY_OPERATING_SYSTEM", default_value = std::env::consts::OS)] + pub operating_system: Option, + #[clap(long, env = "ESPRESSO_SEQUENCER_IDENTITY_NODE_TYPE", default_value = get_default_node_type())] + pub node_type: Option, + #[clap(long, env = "ESPRESSO_SEQUENCER_IDENTITY_NETWORK_TYPE")] + pub network_type: Option, +} + +/// get_default_node_type returns the current public facing binary name and +/// version of this program. +fn get_default_node_type() -> String { + format!("espresso-sequencer {}", env!("CARGO_PKG_VERSION")) +} + // The Debug implementation for Url is noisy, we just want to see the URL fn fmt_urls(v: &[Url], fmt: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { write!(