From 5db2dc7917cbe84aae676e6e5456c0ea306dc056 Mon Sep 17 00:00:00 2001 From: Rob Date: Mon, 22 Jul 2024 11:32:39 -0400 Subject: [PATCH 01/72] sequencer `rollcall` --- Cargo.lock | 22 +++++++------- Cargo.toml | 11 +++++++ sequencer/src/context.rs | 28 ++++++++++++++++-- sequencer/src/lib.rs | 1 + sequencer/src/roll_call.rs | 60 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 108 insertions(+), 14 deletions(-) create mode 100644 sequencer/src/roll_call.rs diff --git a/Cargo.lock b/Cargo.lock index 186132527..70f9636aa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4152,7 +4152,7 @@ dependencies = [ [[package]] name = "hotshot" version = "0.5.60" -source = "git+https://github.com/EspressoSystems/hotshot?tag=rc-0.5.61#b7f5fa34f22df7af6fcfde92caa1b2153102d1df" +source = "git+https://www.github.com/EspressoSystems/hotshot?branch=rm/external-message-test#bbb3585dea770f0fcdbfdc7d73dd52db5d74ba3c" dependencies = [ "anyhow", "async-broadcast", @@ -4198,7 +4198,7 @@ dependencies = [ [[package]] name = "hotshot-builder-api" version = "0.1.7" -source = "git+https://github.com/EspressoSystems/hotshot?tag=rc-0.5.61#b7f5fa34f22df7af6fcfde92caa1b2153102d1df" +source = "git+https://www.github.com/EspressoSystems/hotshot?branch=rm/external-message-test#bbb3585dea770f0fcdbfdc7d73dd52db5d74ba3c" dependencies = [ "async-trait", "clap", @@ -4295,7 +4295,7 @@ dependencies = [ [[package]] name = "hotshot-example-types" version = "0.5.60" -source = "git+https://github.com/EspressoSystems/hotshot?tag=rc-0.5.61#b7f5fa34f22df7af6fcfde92caa1b2153102d1df" +source = "git+https://www.github.com/EspressoSystems/hotshot?branch=rm/external-message-test#bbb3585dea770f0fcdbfdc7d73dd52db5d74ba3c" dependencies = [ "anyhow", "async-broadcast", @@ -4327,7 +4327,7 @@ dependencies = [ [[package]] name = "hotshot-macros" version = "0.5.60" -source = "git+https://github.com/EspressoSystems/hotshot?tag=rc-0.5.61#b7f5fa34f22df7af6fcfde92caa1b2153102d1df" +source = "git+https://www.github.com/EspressoSystems/hotshot?branch=rm/external-message-test#bbb3585dea770f0fcdbfdc7d73dd52db5d74ba3c" dependencies = [ "derive_builder", "proc-macro2", @@ -4338,7 +4338,7 @@ dependencies = [ [[package]] name = "hotshot-orchestrator" version = "0.5.60" -source = "git+https://github.com/EspressoSystems/hotshot?tag=rc-0.5.61#b7f5fa34f22df7af6fcfde92caa1b2153102d1df" +source = "git+https://www.github.com/EspressoSystems/hotshot?branch=rm/external-message-test#bbb3585dea770f0fcdbfdc7d73dd52db5d74ba3c" dependencies = [ "anyhow", "async-compatibility-layer", @@ -4424,7 +4424,7 @@ dependencies = [ [[package]] name = "hotshot-stake-table" version = "0.5.60" -source = "git+https://github.com/EspressoSystems/hotshot?tag=rc-0.5.61#b7f5fa34f22df7af6fcfde92caa1b2153102d1df" +source = "git+https://www.github.com/EspressoSystems/hotshot?branch=rm/external-message-test#bbb3585dea770f0fcdbfdc7d73dd52db5d74ba3c" dependencies = [ "ark-bn254", "ark-ed-on-bn254", @@ -4496,7 +4496,7 @@ dependencies = [ [[package]] name = "hotshot-task" version = "0.5.60" -source = "git+https://github.com/EspressoSystems/hotshot?tag=rc-0.5.61#b7f5fa34f22df7af6fcfde92caa1b2153102d1df" +source = "git+https://www.github.com/EspressoSystems/hotshot?branch=rm/external-message-test#bbb3585dea770f0fcdbfdc7d73dd52db5d74ba3c" dependencies = [ "anyhow", "async-broadcast", @@ -4511,7 +4511,7 @@ dependencies = [ [[package]] name = "hotshot-task-impls" version = "0.5.60" -source = "git+https://github.com/EspressoSystems/hotshot?tag=rc-0.5.61#b7f5fa34f22df7af6fcfde92caa1b2153102d1df" +source = "git+https://www.github.com/EspressoSystems/hotshot?branch=rm/external-message-test#bbb3585dea770f0fcdbfdc7d73dd52db5d74ba3c" dependencies = [ "anyhow", "async-broadcast", @@ -4547,7 +4547,7 @@ dependencies = [ [[package]] name = "hotshot-testing" version = "0.5.60" -source = "git+https://github.com/EspressoSystems/hotshot?tag=rc-0.5.61#b7f5fa34f22df7af6fcfde92caa1b2153102d1df" +source = "git+https://www.github.com/EspressoSystems/hotshot?branch=rm/external-message-test#bbb3585dea770f0fcdbfdc7d73dd52db5d74ba3c" dependencies = [ "anyhow", "async-broadcast", @@ -4589,7 +4589,7 @@ dependencies = [ [[package]] name = "hotshot-types" version = "0.1.11" -source = "git+https://github.com/EspressoSystems/hotshot?tag=rc-0.5.61#b7f5fa34f22df7af6fcfde92caa1b2153102d1df" +source = "git+https://www.github.com/EspressoSystems/hotshot?branch=rm/external-message-test#bbb3585dea770f0fcdbfdc7d73dd52db5d74ba3c" dependencies = [ "anyhow", "ark-bls12-381", @@ -5870,7 +5870,7 @@ dependencies = [ [[package]] name = "libp2p-networking" version = "0.5.60" -source = "git+https://github.com/EspressoSystems/hotshot?tag=rc-0.5.61#b7f5fa34f22df7af6fcfde92caa1b2153102d1df" +source = "git+https://www.github.com/EspressoSystems/hotshot?branch=rm/external-message-test#bbb3585dea770f0fcdbfdc7d73dd52db5d74ba3c" dependencies = [ "anyhow", "async-compatibility-layer", diff --git a/Cargo.toml b/Cargo.toml index 2faef8782..b8feb8bd5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -131,3 +131,14 @@ paste = "1.0" rand = "0.8.5" time = "0.3" trait-set = "0.3.0" + + +[patch."https://github.com/EspressoSystems/HotShot.git"] +hotshot = { git = "https://www.github.com/EspressoSystems/hotshot", branch = "rm/external-message-test" } +hotshot-builder-api = { git = "https://www.github.com/EspressoSystems/hotshot", branch = "rm/external-message-test" } +hotshot-orchestrator = { git = "https://www.github.com/EspressoSystems/hotshot", branch = "rm/external-message-test" } +hotshot-stake-table = { git = "https://www.github.com/EspressoSystems/hotshot", branch = "rm/external-message-test" } +hotshot-task = { git = "https://www.github.com/EspressoSystems/hotshot", branch = "rm/external-message-test" } +hotshot-testing = { git = "https://www.github.com/EspressoSystems/hotshot", branch = "rm/external-message-test" } +hotshot-example-types = { git = "https://www.github.com/EspressoSystems/hotshot", branch = "rm/external-message-test" } +hotshot-types = { git = "https://www.github.com/EspressoSystems/hotshot", branch = "rm/external-message-test" } diff --git a/sequencer/src/context.rs b/sequencer/src/context.rs index 8efc6db62..a6b803747 100644 --- a/sequencer/src/context.rs +++ b/sequencer/src/context.rs @@ -30,7 +30,10 @@ use hotshot_types::{ use url::Url; use vbs::version::StaticVersionType; -use crate::{state_signature::StateSigner, static_stake_table_commitment, Node, SeqTypes}; +use crate::{ + roll_call::RollCall, state_signature::StateSigner, static_stake_table_commitment, Node, + SeqTypes, +}; /// The consensus handle pub type Consensus = SystemContextHandle>; @@ -49,6 +52,10 @@ pub struct SequencerContext< /// Context for generating state signatures. state_signer: Arc>, + /// Roll call for external messages + #[derivative(Debug = "ignore")] + roll_call: Arc>, + /// An orchestrator to wait for before starting consensus. #[derivative(Debug = "ignore")] wait_for_orchestrator: Option>, @@ -135,7 +142,7 @@ impl, P: SequencerPersistence, Ver: StaticVersionTyp instance_state.node_id, config.clone(), memberships, - network, + network.clone(), initializer, ConsensusMetricsValue::new(metrics), persistence.clone(), @@ -149,10 +156,16 @@ impl, P: SequencerPersistence, Ver: StaticVersionTyp state_signer = state_signer.with_relay_server(url); } + let roll_call = RollCall { + network, + identifier: instance_state.node_id.to_string(), + }; + Ok(Self::new( handle, persistence, state_signer, + roll_call, event_streamer, instance_state, network_config, @@ -164,6 +177,7 @@ impl, P: SequencerPersistence, Ver: StaticVersionTyp handle: Consensus, persistence: Arc>, state_signer: StateSigner, + roll_call: RollCall, event_streamer: Arc>>, node_state: NodeState, config: NetworkConfig, @@ -173,6 +187,7 @@ impl, P: SequencerPersistence, Ver: StaticVersionTyp let mut ctx = Self { handle: Arc::new(RwLock::new(handle)), state_signer: Arc::new(state_signer), + roll_call: Arc::new(roll_call), tasks: Default::default(), detached: false, wait_for_orchestrator: None, @@ -186,6 +201,7 @@ impl, P: SequencerPersistence, Ver: StaticVersionTyp events, persistence, ctx.state_signer.clone(), + ctx.roll_call.clone(), Some(event_streamer.clone()), ), ); @@ -308,10 +324,11 @@ impl, P: SequencerPersistence, Ver: StaticVersionTyp } } -async fn handle_events( +async fn handle_events>( mut events: impl Stream> + Unpin, persistence: Arc>, state_signer: Arc>, + roll_call: Arc>, events_streamer: Option>>>, ) { while let Some(event) = events.next().await { @@ -325,6 +342,11 @@ async fn handle_events( // Generate state signature. state_signer.handle_event(&event).await; + // Handle the external message (maybe we name this "external handler" or something) + if let Err(e) = roll_call.handle_event(&event).await { + tracing::warn!(error = ?e, "Failed to handle external message"); + }; + // Send the event via the event streaming service if let Some(events_streamer) = events_streamer.as_ref() { events_streamer.write().await.handle_event(event).await; diff --git a/sequencer/src/lib.rs b/sequencer/src/lib.rs index a837b7996..fc566a9c2 100644 --- a/sequencer/src/lib.rs +++ b/sequencer/src/lib.rs @@ -6,6 +6,7 @@ pub mod genesis; pub mod hotshot_commitment; pub mod options; pub mod state_signature; +mod roll_call; mod message_compat_tests; diff --git a/sequencer/src/roll_call.rs b/sequencer/src/roll_call.rs new file mode 100644 index 000000000..c6df6f362 --- /dev/null +++ b/sequencer/src/roll_call.rs @@ -0,0 +1,60 @@ +//! Should probably rename this to "external" or something + +use std::sync::Arc; + +use anyhow::{Context, Result}; +use espresso_types::{PubKey, SeqTypes}; +use hotshot::types::{BLSPubKey, Event, EventType}; +use hotshot_types::traits::network::ConnectedNetwork; +use serde::{Deserialize, Serialize}; + +#[derive(Serialize, Deserialize)] +enum ExternalMessage { + /// A request to roll call + /// Has the public key of whatever is asking for the roll call (so it can get a response) + RollCallRequest(BLSPubKey), + + /// A response to a roll call + /// Has the identifier of the node that is responding + RollCallResponse(String), +} + +pub struct RollCall> { + // The network to respond over + pub network: Arc, + + // My node's public identifier + pub identifier: String, +} + +impl> RollCall { + pub async fn handle_event(&self, event: &Event) -> Result<()> { + // Check if the event is an external message + if let EventType::ExternalMessageReceived(external_message_bytes) = &event.event { + // Deserialize the external message + let external_message = bincode::deserialize(external_message_bytes) + .with_context(|| "Failed to deserialize external message")?; + + // Match the type + match external_message { + ExternalMessage::RollCallRequest(pub_key) => { + // If it's a roll call request, send our identifier + let response = ExternalMessage::RollCallResponse(self.identifier.clone()); + + // Serialize the response + let response_bytes = bincode::serialize(&response) + .with_context(|| "Failed to serialize roll call response")?; + + // Send the response + self.network.direct_message(response_bytes, pub_key).await?; + } + + _ => { + return Err(anyhow::anyhow!("Unknown external message type")); + } + } + }; + + Ok(()) + } +} From a599c5d63e1ec2e783430e6951a962b80548f853 Mon Sep 17 00:00:00 2001 From: Rob Date: Mon, 22 Jul 2024 14:00:59 -0400 Subject: [PATCH 02/72] external message handler --- Cargo.lock | 4 +- Cargo.toml | 2 +- builder/src/bin/permissioned-builder.rs | 1 + sequencer/src/context.rs | 41 +++---- sequencer/src/external_event_handler.rs | 149 ++++++++++++++++++++++++ sequencer/src/lib.rs | 6 +- sequencer/src/main.rs | 1 + sequencer/src/options.rs | 5 + sequencer/src/roll_call.rs | 60 ---------- 9 files changed, 185 insertions(+), 84 deletions(-) create mode 100644 sequencer/src/external_event_handler.rs delete mode 100644 sequencer/src/roll_call.rs diff --git a/Cargo.lock b/Cargo.lock index 70f9636aa..0bce41921 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -627,9 +627,9 @@ dependencies = [ [[package]] name = "async-compatibility-layer" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c928880329566b45c159fca61fbc2a1d301a7e5fd2a0e94c17476bb1a3ea526" +checksum = "32dd1dfd4a05a197583e51036d9615f04a4d851089dc119ee965d440d0bcaa39" dependencies = [ "async-lock 3.4.0", "async-std", diff --git a/Cargo.toml b/Cargo.toml index b8feb8bd5..9c611c019 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,7 +30,7 @@ ark-ff = "0.4" ark-poly = "0.4" ark-serialize = "0.4" ark-srs = "0.3.1" -async-compatibility-layer = { version = "1.1", default-features = false, features = [ +async-compatibility-layer = { version = "1.2.1", default-features = false, features = [ "logging-utils", ] } async-once-cell = "0.5" diff --git a/builder/src/bin/permissioned-builder.rs b/builder/src/bin/permissioned-builder.rs index b76d02b24..cc9770d37 100644 --- a/builder/src/bin/permissioned-builder.rs +++ b/builder/src/bin/permissioned-builder.rs @@ -255,6 +255,7 @@ async fn main() -> anyhow::Result<()> { private_staking_key: private_staking_key.clone(), private_state_key, state_peers: opt.state_peers, + public_api_url: None, config_peers: None, catchup_backoff: Default::default(), }; diff --git a/sequencer/src/context.rs b/sequencer/src/context.rs index a6b803747..dee4fc5cc 100644 --- a/sequencer/src/context.rs +++ b/sequencer/src/context.rs @@ -15,7 +15,7 @@ use futures::{ }; use hotshot::{ traits::election::static_committee::GeneralStaticCommittee, - types::{Event, SystemContextHandle}, + types::{Event, EventType, SystemContextHandle}, Memberships, SystemContext, }; use hotshot_events_service::events_source::{EventConsumer, EventsStreamer}; @@ -31,8 +31,9 @@ use url::Url; use vbs::version::StaticVersionType; use crate::{ - roll_call::RollCall, state_signature::StateSigner, static_stake_table_commitment, Node, - SeqTypes, + external_event_handler::{self, ExternalEventHandler}, + state_signature::StateSigner, + static_stake_table_commitment, Node, SeqTypes, }; /// The consensus handle pub type Consensus = SystemContextHandle>; @@ -52,10 +53,6 @@ pub struct SequencerContext< /// Context for generating state signatures. state_signer: Arc>, - /// Roll call for external messages - #[derivative(Debug = "ignore")] - roll_call: Arc>, - /// An orchestrator to wait for before starting consensus. #[derivative(Debug = "ignore")] wait_for_orchestrator: Option>, @@ -86,6 +83,7 @@ impl, P: SequencerPersistence, Ver: StaticVersionTyp state_relay_server: Option, metrics: &dyn Metrics, stake_table_capacity: u64, + public_api_url: Option, _: Ver, ) -> anyhow::Result { let config = &network_config.config; @@ -156,16 +154,18 @@ impl, P: SequencerPersistence, Ver: StaticVersionTyp state_signer = state_signer.with_relay_server(url); } - let roll_call = RollCall { - network, - identifier: instance_state.node_id.to_string(), - }; + // Create the roll call info we will be using + let roll_call_info = external_event_handler::RollCallInfo { public_api_url }; + + // Create the external event handler + let external_event_handler = ExternalEventHandler::new(network, roll_call_info) + .with_context(|| "Failed to create external event handler")?; Ok(Self::new( handle, persistence, state_signer, - roll_call, + external_event_handler, event_streamer, instance_state, network_config, @@ -177,7 +177,7 @@ impl, P: SequencerPersistence, Ver: StaticVersionTyp handle: Consensus, persistence: Arc>, state_signer: StateSigner, - roll_call: RollCall, + external_event_handler: ExternalEventHandler, event_streamer: Arc>>, node_state: NodeState, config: NetworkConfig, @@ -187,7 +187,6 @@ impl, P: SequencerPersistence, Ver: StaticVersionTyp let mut ctx = Self { handle: Arc::new(RwLock::new(handle)), state_signer: Arc::new(state_signer), - roll_call: Arc::new(roll_call), tasks: Default::default(), detached: false, wait_for_orchestrator: None, @@ -201,7 +200,7 @@ impl, P: SequencerPersistence, Ver: StaticVersionTyp events, persistence, ctx.state_signer.clone(), - ctx.roll_call.clone(), + external_event_handler, Some(event_streamer.clone()), ), ); @@ -328,7 +327,7 @@ async fn handle_events>( mut events: impl Stream> + Unpin, persistence: Arc>, state_signer: Arc>, - roll_call: Arc>, + external_event_handler: ExternalEventHandler, events_streamer: Option>>>, ) { while let Some(event) = events.next().await { @@ -342,10 +341,12 @@ async fn handle_events>( // Generate state signature. state_signer.handle_event(&event).await; - // Handle the external message (maybe we name this "external handler" or something) - if let Err(e) = roll_call.handle_event(&event).await { - tracing::warn!(error = ?e, "Failed to handle external message"); - }; + // Handle external messages + if let EventType::ExternalMessageReceived(external_message_bytes) = &event.event { + if let Err(err) = external_event_handler.handle_event(external_message_bytes) { + tracing::warn!("Failed to handle external message: {:?}", err); + }; + } // Send the event via the event streaming service if let Some(events_streamer) = events_streamer.as_ref() { diff --git a/sequencer/src/external_event_handler.rs b/sequencer/src/external_event_handler.rs new file mode 100644 index 000000000..046915bdf --- /dev/null +++ b/sequencer/src/external_event_handler.rs @@ -0,0 +1,149 @@ +//! Should probably rename this to "external" or something + +use std::{collections::BTreeSet, sync::Arc}; + +use anyhow::{Context, Result}; +use async_compatibility_layer::channel::{Receiver, Sender}; +use async_std::task::{self, JoinHandle}; +use espresso_types::PubKey; +use hotshot::types::BLSPubKey; +use hotshot_types::traits::network::{BroadcastDelay, ConnectedNetwork}; +use serde::{Deserialize, Serialize}; +use url::Url; + +/// An external message that can be sent to or received from a node +#[derive(Serialize, Deserialize, Clone)] +pub enum ExternalMessage { + /// A request for a node to respond with its identifier + /// Contains the public key of the node that is requesting the roll call + RollCallRequest(BLSPubKey), + + /// A response to a roll call request + /// Contains the identifier of the node + RollCallResponse(RollCallInfo), +} + +/// Information about a node that is used in a roll call response +#[derive(Serialize, Deserialize, Clone)] +pub struct RollCallInfo { + // The public API URL of the node + pub public_api_url: Option, +} + +/// The external event handler state +pub struct ExternalEventHandler> { + // The network to respond over + pub network: Arc, + + // The `RollCallInfo` of the node (used in the roll call response) + pub roll_call_info: RollCallInfo, + + // The tasks that are running + pub tasks: Vec>, + + // The outbound message queue + pub outbound_message_sender: Sender, +} + +// The different types of outbound messages (broadcast or direct) +pub enum OutboundMessage { + Direct(Vec, PubKey), + Broadcast(Vec), +} + +impl> ExternalEventHandler { + /// Creates a new `ExternalEventHandler` with the given network and roll call info + pub fn new(network: Arc, roll_call_info: RollCallInfo) -> Result { + // Create the outbound message queue + let (outbound_message_sender, outbound_message_receiver) = + async_compatibility_layer::channel::bounded(50); + + // Spawn the outbound message handling loop + let outbound_message_loop = async_std::task::spawn(Self::outbound_message_loop( + outbound_message_receiver, + network.clone(), + )); + + // We just started, so queue an outbound RollCall message + let roll_call_message = ExternalMessage::RollCallResponse(roll_call_info.clone()); + let roll_call_message_bytes = bincode::serialize(&roll_call_message) + .with_context(|| "Failed to serialize roll call message for initial broadcast")?; + outbound_message_sender + .try_send(OutboundMessage::Broadcast(roll_call_message_bytes)) + .with_context(|| "External outbound message queue is somehow full")?; + + Ok(Self { + network, + roll_call_info, + tasks: vec![outbound_message_loop], + outbound_message_sender, + }) + } + + /// Handles an event + /// + /// # Errors + /// If the message type is unknown or if there is an error serializing or deserializing the message + pub fn handle_event(&self, external_message_bytes: &[u8]) -> Result<()> { + // Deserialize the external message + let external_message = bincode::deserialize(external_message_bytes) + .with_context(|| "Failed to deserialize external message")?; + + // Match the type + match external_message { + ExternalMessage::RollCallRequest(pub_key) => { + // If it's a roll call request, send our information + let response = ExternalMessage::RollCallResponse(self.roll_call_info.clone()); + + // Serialize the response + let response_bytes = bincode::serialize(&response) + .with_context(|| "Failed to serialize roll call response")?; + + // Send the response + self.outbound_message_sender + .try_send(OutboundMessage::Direct(response_bytes, pub_key)) + .with_context(|| "External outbound message queue is full")?; + } + + _ => { + return Err(anyhow::anyhow!("Unknown external message type")); + } + } + Ok(()) + } + + /// The main loop for sending outbound messages. + /// This is a queue so that we don't block the main event loop when sending messages. + async fn outbound_message_loop(mut receiver: Receiver, network: Arc) { + while let Ok(message) = receiver.recv().await { + // Match the message type + match message { + OutboundMessage::Direct(message, recipient) => { + // Send the message directly to the recipient + if let Err(err) = network.direct_message(message, recipient).await { + tracing::error!("Failed to send message: {:?}", err); + }; + } + + OutboundMessage::Broadcast(message) => { + // Broadcast the message to the global topic + if let Err(err) = network + .broadcast_message(message, BTreeSet::new(), BroadcastDelay::None) + .await + { + tracing::error!("Failed to broadcast message: {:?}", err); + }; + } + } + } + } +} + +impl> Drop for ExternalEventHandler { + fn drop(&mut self) { + // Cancel all tasks + for task in self.tasks.drain(..) { + task::block_on(task.cancel()); + } + } +} diff --git a/sequencer/src/lib.rs b/sequencer/src/lib.rs index fc566a9c2..aa465062e 100644 --- a/sequencer/src/lib.rs +++ b/sequencer/src/lib.rs @@ -3,10 +3,10 @@ pub mod catchup; pub mod context; pub mod genesis; +mod external_event_handler; pub mod hotshot_commitment; pub mod options; pub mod state_signature; -mod roll_call; mod message_compat_tests; @@ -105,6 +105,8 @@ pub struct NetworkParams { pub state_peers: Vec, pub config_peers: Option>, pub catchup_backoff: BackoffParams, + /// The address to advertise as our public API's URL + pub public_api_url: Option, /// The address to send to other Libp2p nodes to contact us pub libp2p_advertise_address: SocketAddr, @@ -355,6 +357,7 @@ pub async fn init_node( Some(network_params.state_relay_server_url), metrics, genesis.stake_table.capacity, + network_params.public_api_url, bind_version, ) .await?; @@ -691,6 +694,7 @@ pub mod testing { self.state_relay_url.clone(), metrics, stake_table_capacity, + None, // The public API URL bind_version, ) .await diff --git a/sequencer/src/main.rs b/sequencer/src/main.rs index 1142e5c58..11b0a1b2a 100644 --- a/sequencer/src/main.rs +++ b/sequencer/src/main.rs @@ -80,6 +80,7 @@ where libp2p_bootstrap_nodes: opt.libp2p_bootstrap_nodes, orchestrator_url: opt.orchestrator_url, state_relay_server_url: opt.state_relay_server_url, + public_api_url: opt.public_api_url, private_staking_key, private_state_key, state_peers: opt.state_peers, diff --git a/sequencer/src/options.rs b/sequencer/src/options.rs index d8384bacb..adbeb9e96 100644 --- a/sequencer/src/options.rs +++ b/sequencer/src/options.rs @@ -75,6 +75,11 @@ pub struct Options { )] pub libp2p_bind_address: String, + /// The URL we advertise to other nodes as being for our public API. + /// Should be supplied in `http://host:port` form. + #[clap(long, env = "ESPRESSO_SEQUENCER_PUBLIC_API_URL")] + pub public_api_url: Option, + /// The address we advertise to other nodes as being a Libp2p endpoint. /// Should be supplied in `host:port` form. #[clap( diff --git a/sequencer/src/roll_call.rs b/sequencer/src/roll_call.rs deleted file mode 100644 index c6df6f362..000000000 --- a/sequencer/src/roll_call.rs +++ /dev/null @@ -1,60 +0,0 @@ -//! Should probably rename this to "external" or something - -use std::sync::Arc; - -use anyhow::{Context, Result}; -use espresso_types::{PubKey, SeqTypes}; -use hotshot::types::{BLSPubKey, Event, EventType}; -use hotshot_types::traits::network::ConnectedNetwork; -use serde::{Deserialize, Serialize}; - -#[derive(Serialize, Deserialize)] -enum ExternalMessage { - /// A request to roll call - /// Has the public key of whatever is asking for the roll call (so it can get a response) - RollCallRequest(BLSPubKey), - - /// A response to a roll call - /// Has the identifier of the node that is responding - RollCallResponse(String), -} - -pub struct RollCall> { - // The network to respond over - pub network: Arc, - - // My node's public identifier - pub identifier: String, -} - -impl> RollCall { - pub async fn handle_event(&self, event: &Event) -> Result<()> { - // Check if the event is an external message - if let EventType::ExternalMessageReceived(external_message_bytes) = &event.event { - // Deserialize the external message - let external_message = bincode::deserialize(external_message_bytes) - .with_context(|| "Failed to deserialize external message")?; - - // Match the type - match external_message { - ExternalMessage::RollCallRequest(pub_key) => { - // If it's a roll call request, send our identifier - let response = ExternalMessage::RollCallResponse(self.identifier.clone()); - - // Serialize the response - let response_bytes = bincode::serialize(&response) - .with_context(|| "Failed to serialize roll call response")?; - - // Send the response - self.network.direct_message(response_bytes, pub_key).await?; - } - - _ => { - return Err(anyhow::anyhow!("Unknown external message type")); - } - } - }; - - Ok(()) - } -} From 8497354f77e8bd4ed120f784cbaceec926efcc3f Mon Sep 17 00:00:00 2001 From: Rob Date: Mon, 22 Jul 2024 15:13:41 -0400 Subject: [PATCH 03/72] address lints --- sequencer/src/context.rs | 6 +++--- sequencer/src/external_event_handler.rs | 22 ++++++++++++---------- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/sequencer/src/context.rs b/sequencer/src/context.rs index dee4fc5cc..42f25ff87 100644 --- a/sequencer/src/context.rs +++ b/sequencer/src/context.rs @@ -177,7 +177,7 @@ impl, P: SequencerPersistence, Ver: StaticVersionTyp handle: Consensus, persistence: Arc>, state_signer: StateSigner, - external_event_handler: ExternalEventHandler, + external_event_handler: ExternalEventHandler, event_streamer: Arc>>, node_state: NodeState, config: NetworkConfig, @@ -323,11 +323,11 @@ impl, P: SequencerPersistence, Ver: StaticVersionTyp } } -async fn handle_events>( +async fn handle_events( mut events: impl Stream> + Unpin, persistence: Arc>, state_signer: Arc>, - external_event_handler: ExternalEventHandler, + external_event_handler: ExternalEventHandler, events_streamer: Option>>>, ) { while let Some(event) = events.next().await { diff --git a/sequencer/src/external_event_handler.rs b/sequencer/src/external_event_handler.rs index 046915bdf..f7b86c96d 100644 --- a/sequencer/src/external_event_handler.rs +++ b/sequencer/src/external_event_handler.rs @@ -31,10 +31,7 @@ pub struct RollCallInfo { } /// The external event handler state -pub struct ExternalEventHandler> { - // The network to respond over - pub network: Arc, - +pub struct ExternalEventHandler { // The `RollCallInfo` of the node (used in the roll call response) pub roll_call_info: RollCallInfo, @@ -51,9 +48,12 @@ pub enum OutboundMessage { Broadcast(Vec), } -impl> ExternalEventHandler { +impl ExternalEventHandler { /// Creates a new `ExternalEventHandler` with the given network and roll call info - pub fn new(network: Arc, roll_call_info: RollCallInfo) -> Result { + pub fn new>( + network: Arc, + roll_call_info: RollCallInfo, + ) -> Result { // Create the outbound message queue let (outbound_message_sender, outbound_message_receiver) = async_compatibility_layer::channel::bounded(50); @@ -61,7 +61,7 @@ impl> ExternalEventHandler { // Spawn the outbound message handling loop let outbound_message_loop = async_std::task::spawn(Self::outbound_message_loop( outbound_message_receiver, - network.clone(), + network, )); // We just started, so queue an outbound RollCall message @@ -73,7 +73,6 @@ impl> ExternalEventHandler { .with_context(|| "External outbound message queue is somehow full")?; Ok(Self { - network, roll_call_info, tasks: vec![outbound_message_loop], outbound_message_sender, @@ -114,7 +113,10 @@ impl> ExternalEventHandler { /// The main loop for sending outbound messages. /// This is a queue so that we don't block the main event loop when sending messages. - async fn outbound_message_loop(mut receiver: Receiver, network: Arc) { + async fn outbound_message_loop>( + mut receiver: Receiver, + network: Arc, + ) { while let Ok(message) = receiver.recv().await { // Match the message type match message { @@ -139,7 +141,7 @@ impl> ExternalEventHandler { } } -impl> Drop for ExternalEventHandler { +impl Drop for ExternalEventHandler { fn drop(&mut self) { // Cancel all tasks for task in self.tasks.drain(..) { From 01ba5e92ea4a9ae575b1ed3690e7877ad3c13ccf Mon Sep 17 00:00:00 2001 From: Rob Date: Mon, 22 Jul 2024 16:39:45 -0400 Subject: [PATCH 04/72] don't respond if we didn't specify a URL --- sequencer/src/external_event_handler.rs | 26 +++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/sequencer/src/external_event_handler.rs b/sequencer/src/external_event_handler.rs index f7b86c96d..64b494ac9 100644 --- a/sequencer/src/external_event_handler.rs +++ b/sequencer/src/external_event_handler.rs @@ -56,7 +56,7 @@ impl ExternalEventHandler { ) -> Result { // Create the outbound message queue let (outbound_message_sender, outbound_message_receiver) = - async_compatibility_layer::channel::bounded(50); + async_compatibility_layer::channel::bounded(10); // Spawn the outbound message handling loop let outbound_message_loop = async_std::task::spawn(Self::outbound_message_loop( @@ -64,13 +64,15 @@ impl ExternalEventHandler { network, )); - // We just started, so queue an outbound RollCall message - let roll_call_message = ExternalMessage::RollCallResponse(roll_call_info.clone()); - let roll_call_message_bytes = bincode::serialize(&roll_call_message) - .with_context(|| "Failed to serialize roll call message for initial broadcast")?; - outbound_message_sender - .try_send(OutboundMessage::Broadcast(roll_call_message_bytes)) - .with_context(|| "External outbound message queue is somehow full")?; + // We just started, so queue an outbound RollCall message (if we have a public API URL) + if roll_call_info.public_api_url.is_some() { + let roll_call_message = ExternalMessage::RollCallResponse(roll_call_info.clone()); + let roll_call_message_bytes = bincode::serialize(&roll_call_message) + .with_context(|| "Failed to serialize roll call message for initial broadcast")?; + outbound_message_sender + .try_send(OutboundMessage::Broadcast(roll_call_message_bytes)) + .with_context(|| "External outbound message queue is somehow full")?; + } Ok(Self { roll_call_info, @@ -91,7 +93,12 @@ impl ExternalEventHandler { // Match the type match external_message { ExternalMessage::RollCallRequest(pub_key) => { - // If it's a roll call request, send our information + if self.roll_call_info.public_api_url.is_none() { + // We don't have a public API URL, so we can't respond to the roll call + return Ok(()); + } + + // If it's a roll call request, send our information (if we have a public API URL) let response = ExternalMessage::RollCallResponse(self.roll_call_info.clone()); // Serialize the response @@ -112,7 +119,6 @@ impl ExternalEventHandler { } /// The main loop for sending outbound messages. - /// This is a queue so that we don't block the main event loop when sending messages. async fn outbound_message_loop>( mut receiver: Receiver, network: Arc, From fe97c143f7ef1d46fd4572e872e1a38135ddf0b4 Mon Sep 17 00:00:00 2001 From: Rob Date: Tue, 23 Jul 2024 16:53:53 -0400 Subject: [PATCH 05/72] remove patch --- Cargo.lock | 22 +++++++++++----------- Cargo.toml | 10 ---------- 2 files changed, 11 insertions(+), 21 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0bce41921..05c1cbdec 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4152,7 +4152,7 @@ dependencies = [ [[package]] name = "hotshot" version = "0.5.60" -source = "git+https://www.github.com/EspressoSystems/hotshot?branch=rm/external-message-test#bbb3585dea770f0fcdbfdc7d73dd52db5d74ba3c" +source = "git+https://github.com/EspressoSystems/hotshot?tag=rc-0.5.61#b7f5fa34f22df7af6fcfde92caa1b2153102d1df" dependencies = [ "anyhow", "async-broadcast", @@ -4198,7 +4198,7 @@ dependencies = [ [[package]] name = "hotshot-builder-api" version = "0.1.7" -source = "git+https://www.github.com/EspressoSystems/hotshot?branch=rm/external-message-test#bbb3585dea770f0fcdbfdc7d73dd52db5d74ba3c" +source = "git+https://github.com/EspressoSystems/hotshot?tag=rc-0.5.61#b7f5fa34f22df7af6fcfde92caa1b2153102d1df" dependencies = [ "async-trait", "clap", @@ -4295,7 +4295,7 @@ dependencies = [ [[package]] name = "hotshot-example-types" version = "0.5.60" -source = "git+https://www.github.com/EspressoSystems/hotshot?branch=rm/external-message-test#bbb3585dea770f0fcdbfdc7d73dd52db5d74ba3c" +source = "git+https://github.com/EspressoSystems/hotshot?tag=rc-0.5.61#b7f5fa34f22df7af6fcfde92caa1b2153102d1df" dependencies = [ "anyhow", "async-broadcast", @@ -4327,7 +4327,7 @@ dependencies = [ [[package]] name = "hotshot-macros" version = "0.5.60" -source = "git+https://www.github.com/EspressoSystems/hotshot?branch=rm/external-message-test#bbb3585dea770f0fcdbfdc7d73dd52db5d74ba3c" +source = "git+https://github.com/EspressoSystems/hotshot?tag=rc-0.5.61#b7f5fa34f22df7af6fcfde92caa1b2153102d1df" dependencies = [ "derive_builder", "proc-macro2", @@ -4338,7 +4338,7 @@ dependencies = [ [[package]] name = "hotshot-orchestrator" version = "0.5.60" -source = "git+https://www.github.com/EspressoSystems/hotshot?branch=rm/external-message-test#bbb3585dea770f0fcdbfdc7d73dd52db5d74ba3c" +source = "git+https://github.com/EspressoSystems/hotshot?tag=rc-0.5.61#b7f5fa34f22df7af6fcfde92caa1b2153102d1df" dependencies = [ "anyhow", "async-compatibility-layer", @@ -4424,7 +4424,7 @@ dependencies = [ [[package]] name = "hotshot-stake-table" version = "0.5.60" -source = "git+https://www.github.com/EspressoSystems/hotshot?branch=rm/external-message-test#bbb3585dea770f0fcdbfdc7d73dd52db5d74ba3c" +source = "git+https://github.com/EspressoSystems/hotshot?tag=rc-0.5.61#b7f5fa34f22df7af6fcfde92caa1b2153102d1df" dependencies = [ "ark-bn254", "ark-ed-on-bn254", @@ -4496,7 +4496,7 @@ dependencies = [ [[package]] name = "hotshot-task" version = "0.5.60" -source = "git+https://www.github.com/EspressoSystems/hotshot?branch=rm/external-message-test#bbb3585dea770f0fcdbfdc7d73dd52db5d74ba3c" +source = "git+https://github.com/EspressoSystems/hotshot?tag=rc-0.5.61#b7f5fa34f22df7af6fcfde92caa1b2153102d1df" dependencies = [ "anyhow", "async-broadcast", @@ -4511,7 +4511,7 @@ dependencies = [ [[package]] name = "hotshot-task-impls" version = "0.5.60" -source = "git+https://www.github.com/EspressoSystems/hotshot?branch=rm/external-message-test#bbb3585dea770f0fcdbfdc7d73dd52db5d74ba3c" +source = "git+https://github.com/EspressoSystems/hotshot?tag=rc-0.5.61#b7f5fa34f22df7af6fcfde92caa1b2153102d1df" dependencies = [ "anyhow", "async-broadcast", @@ -4547,7 +4547,7 @@ dependencies = [ [[package]] name = "hotshot-testing" version = "0.5.60" -source = "git+https://www.github.com/EspressoSystems/hotshot?branch=rm/external-message-test#bbb3585dea770f0fcdbfdc7d73dd52db5d74ba3c" +source = "git+https://github.com/EspressoSystems/hotshot?tag=rc-0.5.61#b7f5fa34f22df7af6fcfde92caa1b2153102d1df" dependencies = [ "anyhow", "async-broadcast", @@ -4589,7 +4589,7 @@ dependencies = [ [[package]] name = "hotshot-types" version = "0.1.11" -source = "git+https://www.github.com/EspressoSystems/hotshot?branch=rm/external-message-test#bbb3585dea770f0fcdbfdc7d73dd52db5d74ba3c" +source = "git+https://github.com/EspressoSystems/hotshot?tag=rc-0.5.61#b7f5fa34f22df7af6fcfde92caa1b2153102d1df" dependencies = [ "anyhow", "ark-bls12-381", @@ -5870,7 +5870,7 @@ dependencies = [ [[package]] name = "libp2p-networking" version = "0.5.60" -source = "git+https://www.github.com/EspressoSystems/hotshot?branch=rm/external-message-test#bbb3585dea770f0fcdbfdc7d73dd52db5d74ba3c" +source = "git+https://github.com/EspressoSystems/hotshot?tag=rc-0.5.61#b7f5fa34f22df7af6fcfde92caa1b2153102d1df" dependencies = [ "anyhow", "async-compatibility-layer", diff --git a/Cargo.toml b/Cargo.toml index 9c611c019..8fae6f258 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -132,13 +132,3 @@ rand = "0.8.5" time = "0.3" trait-set = "0.3.0" - -[patch."https://github.com/EspressoSystems/HotShot.git"] -hotshot = { git = "https://www.github.com/EspressoSystems/hotshot", branch = "rm/external-message-test" } -hotshot-builder-api = { git = "https://www.github.com/EspressoSystems/hotshot", branch = "rm/external-message-test" } -hotshot-orchestrator = { git = "https://www.github.com/EspressoSystems/hotshot", branch = "rm/external-message-test" } -hotshot-stake-table = { git = "https://www.github.com/EspressoSystems/hotshot", branch = "rm/external-message-test" } -hotshot-task = { git = "https://www.github.com/EspressoSystems/hotshot", branch = "rm/external-message-test" } -hotshot-testing = { git = "https://www.github.com/EspressoSystems/hotshot", branch = "rm/external-message-test" } -hotshot-example-types = { git = "https://www.github.com/EspressoSystems/hotshot", branch = "rm/external-message-test" } -hotshot-types = { git = "https://www.github.com/EspressoSystems/hotshot", branch = "rm/external-message-test" } From 03340716ecb2ec583934bcbed272f59b8ae3abdb Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Tue, 23 Jul 2024 15:14:23 -0600 Subject: [PATCH 06/72] Add node-metrics crate The `node-metrics` is designed to provide information about the block chain and the nodes connected to the block chain network. The name of the crate was suggested by Charles so that it might be used by other systems. However, in general it is intended to be used to serve information for the Node Validator Dashboard. This crates defines various messages and logic for consuming and distributing information about the Espresso Block Chain and the nodes connected to the Espresso Sequencer network. The current implementation is a basic implementation in an effort to get the simplest setup working. At the moment, there are two issues that prevent anything from building. - The first is that the `Sink` implementation of `Connection` from `tide-disco` requires the message being send to bea borrowed value instead of the value itself. I feel that this is unexpected, and it makes it difficult to actually send messages as a passthrough. - The second is that the `sequencer` crate is being consumed in an effort to extract the `SeqTypes` type. By doing this, the idea is that we should be able to use the actual types defined by the sequencer so that we are able to consume the exact information we will be given in a production environment. However, due to this import the tests that have been written cannot be evaluated. I believe that this means that this cannot work as expected, and will need to be rethought. --- Cargo.lock | 27 + Cargo.toml | 3 + node-metrics/Cargo.toml | 30 + node-metrics/src/api/mod.rs | 1 + node-metrics/src/api/node_validator/mod.rs | 1 + node-metrics/src/api/node_validator/v0/mod.rs | 240 +++++++ .../api/node_validator/v0/node_validator.toml | 40 ++ node-metrics/src/lib.rs | 126 ++++ node-metrics/src/main.rs | 65 ++ node-metrics/src/service/client_id/mod.rs | 12 + .../src/service/client_message/mod.rs | 58 ++ node-metrics/src/service/client_state/mod.rs | 591 ++++++++++++++++++ node-metrics/src/service/data_state/mod.rs | 270 ++++++++ node-metrics/src/service/mod.rs | 6 + node-metrics/src/service/node_type/mod.rs | 1 + .../src/service/server_message/mod.rs | 32 + node-metrics/src/test/mod.rs | 1 + 17 files changed, 1504 insertions(+) create mode 100644 node-metrics/Cargo.toml create mode 100644 node-metrics/src/api/mod.rs create mode 100644 node-metrics/src/api/node_validator/mod.rs create mode 100644 node-metrics/src/api/node_validator/v0/mod.rs create mode 100644 node-metrics/src/api/node_validator/v0/node_validator.toml create mode 100644 node-metrics/src/lib.rs create mode 100644 node-metrics/src/main.rs create mode 100644 node-metrics/src/service/client_id/mod.rs create mode 100644 node-metrics/src/service/client_message/mod.rs create mode 100644 node-metrics/src/service/client_state/mod.rs create mode 100644 node-metrics/src/service/data_state/mod.rs create mode 100644 node-metrics/src/service/mod.rs create mode 100644 node-metrics/src/service/node_type/mod.rs create mode 100644 node-metrics/src/service/server_message/mod.rs create mode 100644 node-metrics/src/test/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 1f777420e..919c3636b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1685,6 +1685,12 @@ dependencies = [ "inout", ] +[[package]] +name = "circular-buffer" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da987586004ae7c43b7df5e3f7693775068522e1086f8d9b2d74c778a0f43313" + [[package]] name = "clap" version = "4.5.9" @@ -6432,6 +6438,27 @@ dependencies = [ "libc", ] +[[package]] +name = "node-metrics" +version = "0.1.0" +dependencies = [ + "async-compatibility-layer", + "async-std", + "bitvec", + "circular-buffer", + "futures", + "hotshot-query-service", + "hotshot-stake-table", + "hotshot-testing", + "hotshot-types", + "sequencer", + "serde", + "tide-disco", + "time 0.3.36", + "toml", + "vbs", +] + [[package]] name = "nom" version = "7.1.3" diff --git a/Cargo.toml b/Cargo.toml index 9d79827c4..fd34f450a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,6 +14,7 @@ members = [ "contracts/rust/diff-test", "contracts/rust/gen-vk-contract", "hotshot-state-prover", + "node-metrics", "sequencer", "types", "utils", @@ -39,7 +40,9 @@ async-trait = "0.1" base64 = "0.22" base64-bytes = "0.1" bincode = "1.3.3" +bitvec = "1.0.1" blake3 = "1.5" +circular-buffer = "0.1.7" clap = { version = "4.4", features = ["derive", "env", "string"] } cld = "0.5" derive_more = "0.99.17" diff --git a/node-metrics/Cargo.toml b/node-metrics/Cargo.toml new file mode 100644 index 000000000..ae5f659fb --- /dev/null +++ b/node-metrics/Cargo.toml @@ -0,0 +1,30 @@ +[package] +name = "node-metrics" +description = "A Library for collecting, recording and distributing information about the Espresso Block Chain Network" +version = { workspace = true } +authors = { workspace = true } +edition = { workspace = true } + +[features] +default = ["libp2p"] +testing = ["hotshot-testing"] +libp2p = [] + +[dependencies] +async-compatibility-layer = { workspace = true } +async-std = { workspace = true } +bitvec = { workspace = true } +circular-buffer = { workspace = true } +futures = { workspace = true } +hotshot-query-service = { workspace = true } +hotshot-types = { workspace = true } +hotshot-stake-table = { workspace = true } +sequencer = { path = "../sequencer" } +serde = { workspace = true } +tide-disco = { workspace = true } +time = { workspace = true } +toml = { workspace = true } +vbs = { workspace = true } + +# Dependencies for feature `testing` +hotshot-testing = { workspace = true, optional = true } diff --git a/node-metrics/src/api/mod.rs b/node-metrics/src/api/mod.rs new file mode 100644 index 000000000..539d436a9 --- /dev/null +++ b/node-metrics/src/api/mod.rs @@ -0,0 +1 @@ +pub mod node_validator; diff --git a/node-metrics/src/api/node_validator/mod.rs b/node-metrics/src/api/node_validator/mod.rs new file mode 100644 index 000000000..2d24cd45f --- /dev/null +++ b/node-metrics/src/api/node_validator/mod.rs @@ -0,0 +1 @@ +pub mod v0; diff --git a/node-metrics/src/api/node_validator/v0/mod.rs b/node-metrics/src/api/node_validator/v0/mod.rs new file mode 100644 index 000000000..64ea78d0f --- /dev/null +++ b/node-metrics/src/api/node_validator/v0/mod.rs @@ -0,0 +1,240 @@ +use crate::service::client_message::{ClientMessage, InternalClientMessage}; +use crate::service::server_message::ServerMessage; +use futures::SinkExt; +use futures::{future::FutureExt, stream::StreamExt}; +use serde::{Deserialize, Serialize}; +use std::fmt; +use std::sync::mpsc::{self, Sender}; +use tide_disco::socket::Connection; +use tide_disco::{api::ApiError, Api}; +use vbs::version::{StaticVersion, StaticVersionType, Version}; + +/// CONSTANT for protocol major version +pub const VERSION_MAJ: u16 = 0; + +/// CONSTANT for protocol minor version +pub const VERSION_MIN: u16 = 1; + +pub const VERSION_0_1: Version = Version { + major: VERSION_MAJ, + minor: VERSION_MIN, +}; + +/// Constant for the version of this API. +pub const BASE_VERSION: Version = VERSION_0_1; + +/// Specific type for version 0.1 +pub type Version01 = StaticVersion; +// Static instance of the Version01 type +pub const STATIC_VER_0_1: Version01 = StaticVersion {}; + +#[derive(Debug, Serialize, Deserialize)] +pub enum Error {} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "Error") + } +} + +impl std::error::Error for Error {} + +impl tide_disco::Error for Error { + fn catch_all(_status: tide_disco::StatusCode, _msg: String) -> Self { + todo!() + } + + fn status(&self) -> tide_disco::StatusCode { + todo!() + } +} + +#[derive(Debug)] +pub enum LoadApiError { + Toml(toml::de::Error), + Api(ApiError), +} + +impl From for LoadApiError { + fn from(err: toml::de::Error) -> Self { + LoadApiError::Toml(err) + } +} + +impl From for LoadApiError { + fn from(err: ApiError) -> Self { + LoadApiError::Api(err) + } +} + +pub(crate) fn load_api( + default: &str, +) -> Result, LoadApiError> { + let toml: toml::Value = toml::from_str(default)?; + Ok(Api::new(toml)?) +} + +#[derive(Debug)] +pub enum LoadTomlError { + Io(std::io::Error), + Toml(toml::de::Error), + Utf8(std::str::Utf8Error), +} + +impl From for LoadTomlError { + fn from(err: std::io::Error) -> Self { + LoadTomlError::Io(err) + } +} + +impl From for LoadTomlError { + fn from(err: toml::de::Error) -> Self { + LoadTomlError::Toml(err) + } +} + +impl From for LoadTomlError { + fn from(err: std::str::Utf8Error) -> Self { + LoadTomlError::Utf8(err) + } +} + +#[derive(Debug)] +pub enum DefineApiError { + LoadApiError(LoadApiError), + LoadTomlError(LoadTomlError), + ApiError(ApiError), +} + +impl From for DefineApiError { + fn from(err: LoadApiError) -> Self { + DefineApiError::LoadApiError(err) + } +} + +impl From for DefineApiError { + fn from(err: LoadTomlError) -> Self { + DefineApiError::LoadTomlError(err) + } +} + +impl From for DefineApiError { + fn from(err: ApiError) -> Self { + DefineApiError::ApiError(err) + } +} + +/// [StateClientMessageSender] allows for the retrieval of a [Sender] for sending +/// messages received from the client to the Server for request processing. +pub trait StateClientMessageSender { + fn sender(&self) -> Sender; +} + +#[derive(Debug)] +pub enum EndpointError {} + +pub fn define_api() -> Result, DefineApiError> +where + State: StateClientMessageSender + Send + Sync + 'static, +{ + let mut api = load_api::(include_str!("./node_validator.toml"))?; + + api.with_version("0.0.1".parse().unwrap()).socket( + "details", + move |_req, socket: Connection, state| { + async move { + let client_message_sender = state.sender(); + let (server_message_sender, server_message_receiver) = mpsc::channel(); + + // Let's register ourselves with the Server + if let Err(_) = client_message_sender + .send(InternalClientMessage::Connected(server_message_sender)) + { + todo!(); + } + + // We should receive a response from the server that identifies us + // uniquely. + let client_id = + if let Ok(ServerMessage::YouAre(client_id)) = server_message_receiver.recv() { + client_id + } else { + todo!(); + }; + + let (sink, mut stream) = socket.split(); + + // Now we want to just auto-forward any server message to the client + // in it's own thread, and we also want to aut forward any client + // message to the server. + let client_id = client_id.clone(); + let client_message_sender = client_message_sender.clone(); + let handle = async_std::task::spawn(async move { + let mut sink = sink; + while let Ok(message) = server_message_receiver.recv() { + if let Err(_) = sink.send(&message).await { + // we're closed at this point + break; + } + } + }); + + // Start forwarding message from the client + while let Some(Ok(request)) = stream.next().await { + let internal_client_message = + request.to_internal_with_client_id(client_id.clone()); + if let Err(_) = client_message_sender.send(internal_client_message) { + todo!(); + } + } + + // wait for the spawned task to finish + handle.await; + + Ok(()) + } + .boxed() + }, + )?; + Ok(api) +} + +#[cfg(test)] +mod tests { + use super::StateClientMessageSender; + use crate::service::client_message::InternalClientMessage; + use std::sync::mpsc::{self, Receiver, Sender}; + + struct TestState( + pub(crate) Sender, + pub(crate) Receiver, + ); + + impl TestState { + fn new() -> Self { + let (sender, receiver) = mpsc::channel(); + TestState(sender, receiver) + } + } + + impl StateClientMessageSender for TestState { + fn sender(&self) -> Sender { + self.0.clone() + } + } + + unsafe impl Send for TestState {} + unsafe impl Sync for TestState {} + + // Woo hoo + #[test] + fn test_api_creation() { + let api = super::define_api::(); + match api { + Ok(_) => {} + Err(e) => { + panic!("Error: {:?}", e); + } + } + } +} diff --git a/node-metrics/src/api/node_validator/v0/node_validator.toml b/node-metrics/src/api/node_validator/v0/node_validator.toml new file mode 100644 index 000000000..eaa99f4ea --- /dev/null +++ b/node-metrics/src/api/node_validator/v0/node_validator.toml @@ -0,0 +1,40 @@ +# Copyright (c) 2022 Espresso Systems (espressosys.com) +# This file is part of the HotShot Query Service library. +# +# This program is free software: you can redistribute it and/or modify it under the terms of the GNU +# General Public License as published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# You should have received a copy of the GNU General Public License along with this program. If not, +# see . + +[meta] +FORMAT_VERSION = "0.1.0" +NAME = "node-validator" +DESCRIPTION = """ +HotShot chain state + +The availability API provides an objective view of the HotShot blockchain. It provides access only +to normative data: that is, data which is agreed upon by all honest consensus nodes and which is +immutable. This means access to core consensus data structures including leaves, blocks, and +headers, where each query is pure and idempotent. This also means that it is possible for a client +to verify all of the information provided by this API, by running a HotShot light client and +downloading the appropriate evidence with each query. + +This API does not provide any queries which represent only the _current_ state of the chain or may +change over time, and it does not provide information for which there is not (yet) agreement of a +supermajority of consensus nodes. For information about the current dynamic state of consensus and +uncommitted state, try the `status` API. For information about the chain which is tabulated by this +specific node and not subject to full consensus agreement, try the `node` API. +""" + +[route.details] +PATH = ["details"] +METHOD = "SOCKET" +DOC = """ +Subscribe to a stream of blocks in the order they are sequenced, starting at `:height`. + +Opens a WebSockets connection and sends a stream of the same data type returned by `block/:height`. +""" diff --git a/node-metrics/src/lib.rs b/node-metrics/src/lib.rs new file mode 100644 index 000000000..0e44f43d8 --- /dev/null +++ b/node-metrics/src/lib.rs @@ -0,0 +1,126 @@ +// Copyright (c) 2022 Espresso Systems (espressosys.com) +// This file is part of the HotShot Query Service library. +// +// This program is free software: you can redistribute it and/or modify it under the terms of the GNU +// General Public License as published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without +// even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// You should have received a copy of the GNU General Public License along with this program. If not, +// see . + +//! # Node Validator Service +//! +//! The Node Validator Service is a general purpose relay service that watches +//! data flow from the Hot Shot protocol via the CDN pub sub service. It +//! maintains a local state of the network map and is able to relay the +//! stored details to any client that requests it. In addition it is also +//! able to provide individual state change updates to any client that +//! subscribes to that particular event stream. In order to be able to +//! provide identity information to the clients, this identity information +//! must be volunteered by the nodes in the network. This requires the +//! nodes to be able to receive and respond to these requests, and relay +//! to anyone who desires it, the identity information of the node. +//! +//! ## Storage +//! +//! In order for this service to be effective and efficient it needs to be +//! able to store the state of the network in an efficient manner. The +//! storage should be fast and efficient. We are not expecting a lot of +//! data to be stored within this storage, but as things tend to grow and +//! change it may be necessary to have more robust storage mechanisms in +//! place, or even to have the ability to introduce new storage mechanisms. +//! In order to effectively store the data that we need to store, we need +//! to ask a fundamental question: +//! +//! What states do we need to track? +//! 1. Node Information +//! a. Node Identity Information +//! b. Node State Information (specifically voter participation, latest block +//! information, and staking information) +//! 2. Network Information +//! a. Latest Block +//! b. The most recent N blocks (N assumed to be 50 at the moment) +//! - Information can be derived from these most recent 50 blocks +//! that allows us to derive histogram data, producer data, and +//! the most recent block information. We might be able to get away with +//! just storing the header information of these blocks, since we don't +//! need the full block data. +//! c. The most recent N votes participants +//! d. The top block producers over the latest N blocks +//! e. Histogram data for the latest N blocks +//! - Block Size +//! - Block Time +//! - Block Space Used +//! +//! ## Data Streams +//! +//! In order for clients to be able to receive the information from the node +//! validator service, we need to be able to facilitate requests. We could +//! simply just start streaming data to the clients as soon as they connect, +//! however, this causes potential compatibility issues with the clients +//! in question. For example, if we want to add a new data stream that +//! can be retrieved for the client, and the client isn't expecting it, they +//! won't know how to handle the data, and it can potentially cause errors. +//! As such, it makes sense to only provide data streams when the client asks +//! for them. This allows for new features to be added to the data stream +//! without breaking compatibility with the clients, provided that the existing +//! streams don't change in a way that would break the client. +//! +//! Starting out, there doesn't need to be a lot of data that needs to be +//! streamed to to the client. In fact, we might be able to be a little +//! naive about this, and broadcast general objects in an event stream, as +//! data may be derivable from the objects that are broadcast. For example, +//! if we start out by sending the latest N block information, the client +//! may be able to derive histogram data from that information, which would +//! prevent us from having to send and store the histogram data. However, +//! there may be some pieces of data that are lacking from this approach which +//! would require us to send out additional data streams. +//! +//! Ideally, we should strive for a balance between the data we store locally +//! and the data that we stream to the clients. In order to know what we +//! need to store, we need to know what data we are expecting the client to +//! consume, and which data can be derived for these purposes. +//! +//! What Data Streams do we need to provide to clients? +//! 1. Node Information +//! a. Node Identity Information +//! - Should be able to be sent in an initial batch +//! - Should be able to send individual updates as they occur +//! b. Node State Information +//! - Should be able to be sent in an initial batch +//! - Should be able to send individual updates as they occur +//! c. Block Information +//! - Should be able to be sent in an initial batch +//! - Should be able to send individual updates as they occur + +pub mod api; +pub mod service; + +#[cfg(test)] +pub mod test; + +/// Storage is a general purpose trait that allows for the storage of +/// arbitrary data. This trait allows for the specification of the +/// Get result to be different than that of the Set result. This should +/// allow for a larger degree of flexibility when it comes to storing things. +pub trait Storage { + type Get; + type Set; + fn get(&self) -> Self::Get; + fn set(&mut self, value: Self::Set); +} + +/// KeyValueStorage is a general purpose trait that allows for the storage +/// of key value pairs. This trait allows for the specification of the +/// Key and Value types to be different. This should allow for a larger +/// degree of flexibility when it comes to storing things. +pub trait KeyValueStorage { + type Key: Eq; + type Value: Clone; + fn get(&self, key: &Self::Key) -> &Self::Value; + fn set(&mut self, key: &Self::Key, value: Self::Value); +} + +pub struct NodeInformation {} diff --git a/node-metrics/src/main.rs b/node-metrics/src/main.rs new file mode 100644 index 000000000..26549a333 --- /dev/null +++ b/node-metrics/src/main.rs @@ -0,0 +1,65 @@ +use std::sync::{Arc, RwLock}; + +use node_metrics::api::node_validator; +use tide_disco::App; +use vbs::version::{StaticVersion, Version}; + +/// CONSTANT for protocol major version +pub const VERSION_MAJ: u16 = 0; + +/// CONSTANT for protocol major version +pub const VERSION_MIN: u16 = 1; + +pub const VERSION_0_1: Version = Version { + major: VERSION_MAJ, + minor: VERSION_MIN, +}; + +/// Constant for the base protocol version in this instance of HotShot. +pub const BASE_VERSION: Version = VERSION_0_1; + +/// Type for protocol static version 0.1. +pub type Version01 = StaticVersion; + +/// This represents the latest version of this service. This will likely +/// always be whatever the max API version that's being served is. +pub const SERVICE_VER_0_1: Version01 = StaticVersion {}; + +/// The client definition for the Push CDN. Uses the Quic +/// protocol and no middleware. Differs from the user +/// definition in that is on the client-side. +#[derive(Clone)] +pub struct ClientDef; + +struct State {} + +/// ClientConnectionMessage is a message that indicates when a client is +/// connecting or disconnecting from the service. This message is used +/// to signify when the client arrives or leaves. +pub enum ClientConnectionMessage { + Connected, + Disconnected, +} + +#[async_std::main] +async fn main() { + // We have two separate states we want to maintain as much as possible. + // The first is the Data State, which contains all of the recorded state + // we want to keep track of and to be able to relay at a moment's notice. + // The second is a state of the connected clients. This state should be + // able to be read from and written to indirectly by the clients. + + let state = Arc::new(RwLock::new(State {})); + + let mut app = App::<_, node_validator::v0::Error>::with_state(state); + let node_validator_api_v0 = node_validator::v0::define_api().expect("api to be defined"); + app.register_module("node-validator", node_validator_api_v0) + .expect("register module"); + + // Serve the app + + let url = format!("0.0.0.0:9000"); + app.serve(&url, SERVICE_VER_0_1) + .await + .expect("app to be served"); +} diff --git a/node-metrics/src/service/client_id/mod.rs b/node-metrics/src/service/client_id/mod.rs new file mode 100644 index 000000000..ac34be7f8 --- /dev/null +++ b/node-metrics/src/service/client_id/mod.rs @@ -0,0 +1,12 @@ +use serde::{Deserialize, Serialize}; + +/// [ClientId] represents the unique identifier for a client that is connected +/// to the server. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub struct ClientId(u128); + +impl ClientId { + pub fn from_count(count: u128) -> Self { + ClientId(count) + } +} diff --git a/node-metrics/src/service/client_message/mod.rs b/node-metrics/src/service/client_message/mod.rs new file mode 100644 index 000000000..353854b5e --- /dev/null +++ b/node-metrics/src/service/client_message/mod.rs @@ -0,0 +1,58 @@ +use serde::{Deserialize, Serialize}; + +use super::client_id::ClientId; +use super::server_message::ServerMessage; +use std::sync::mpsc::Sender; + +/// InternalClientMessage represents the message requests that the client can +/// send to the server. These messages are request that the client can send +/// in order for the server to send back responses that correspond to the +/// request. +pub enum InternalClientMessage { + Connected(Sender), + Disconnected(ClientId), + + SubscribeLatestBlock(ClientId), + SubscribeNodeIdentity(ClientId), + + RequestBlocksSnapshot(ClientId), + RequestNodeIdentitySnapshot(ClientId), + RequestHistogramSnapshot(ClientId), +} + +/// [ClientMessage] represents the messages that the client can send to the +/// server for a request. +/// +#[derive(Debug, PartialEq, Serialize, Deserialize)] +pub enum ClientMessage { + SubscribeLatestBlock, + SubscribeNodeIdentity, + + RequestBlocksSnapshot, + RequestNodeIdentitySnapshot, + RequestHistogramSnapshot, +} + +impl ClientMessage { + /// [to_internal_with_client_id] converts the [ClientMessage] into an + /// [InternalClientMessage] with the given [ClientId]. + pub fn to_internal_with_client_id(self, client_id: ClientId) -> InternalClientMessage { + match self { + ClientMessage::SubscribeLatestBlock => { + InternalClientMessage::SubscribeLatestBlock(client_id) + } + ClientMessage::SubscribeNodeIdentity => { + InternalClientMessage::SubscribeNodeIdentity(client_id) + } + ClientMessage::RequestBlocksSnapshot => { + InternalClientMessage::RequestBlocksSnapshot(client_id) + } + ClientMessage::RequestNodeIdentitySnapshot => { + InternalClientMessage::RequestNodeIdentitySnapshot(client_id) + } + ClientMessage::RequestHistogramSnapshot => { + InternalClientMessage::RequestHistogramSnapshot(client_id) + } + } + } +} diff --git a/node-metrics/src/service/client_state/mod.rs b/node-metrics/src/service/client_state/mod.rs new file mode 100644 index 000000000..8b195184e --- /dev/null +++ b/node-metrics/src/service/client_state/mod.rs @@ -0,0 +1,591 @@ +use super::{ + client_id::ClientId, client_message::InternalClientMessage, data_state::DataState, + server_message::ServerMessage, +}; +use futures::{Stream, StreamExt}; +use hotshot_query_service::explorer::{BlockDetail, ExplorerHistograms}; +use sequencer::SeqTypes; +use std::sync::RwLockWriteGuard; +use std::{ + collections::{HashMap, HashSet}, + sync::{ + mpsc::{Receiver, SendError, Sender}, + Arc, RwLock, + }, +}; + +/// ClientState represents the service state of the connected clients. +/// It maintains and represents the connected clients, and their subscriptions. +// This state is meant to be managed in a separate thread that assists with +// processing and updating of individual client states. +pub struct ClientState { + client_id: ClientId, + sender: Sender, +} + +impl ClientState { + /// Create a new ClientState with the given client_id and receiver. + pub fn new(client_id: ClientId, sender: Sender) -> Self { + Self { client_id, sender } + } + + /// Send a message to the client's consuming thread. + pub fn send_message(&self, message: ServerMessage) -> Result<(), SendError> { + self.sender.send(message) + } +} + +/// [ClientThreadState] represents the state of all of the active client +/// connections connected to the service. This state governs which clients +/// are connected, and what subscriptions they have setup. +pub struct ClientThreadState { + clients: HashMap, + subscribed_latest_block: HashSet, + subscribed_node_identity: HashSet, + connection_id_counter: u128, +} + +/// [drop_client_client_thread_state_write_guard] is a utility function for +/// cleaning up the [ClientThreadState] +fn drop_client_client_thread_state_write_guard( + client_id: &ClientId, + client_thread_state_write_guard: &mut RwLockWriteGuard, +) -> Option { + let client = client_thread_state_write_guard.clients.remove(client_id); + client_thread_state_write_guard + .subscribed_latest_block + .remove(client_id); + client_thread_state_write_guard + .subscribed_node_identity + .remove(client_id); + + client +} + +/// [drop_client_no_lock_guard] is a utility function for cleaning up the [ClientThreadState] +/// when a client is detected as disconnected. +fn drop_client_no_lock_guard( + client_id: &ClientId, + client_thread_state: Arc>, +) -> Option { + let mut client_thread_state_write_lock_guard = match client_thread_state.write() { + Ok(lock) => lock, + Err(_) => return None, + }; + + drop_client_client_thread_state_write_guard( + client_id, + &mut client_thread_state_write_lock_guard, + ) +} + +/// [handle_client_message_connected] is a function that processes the client +/// message to connect a client to the service. +pub fn handle_client_message_connected( + sender: Sender, + client_thread_state: Arc>, +) -> Result { + let mut client_thread_state_write_lock_guard = match client_thread_state.write() { + Ok(lock) => lock, + Err(_) => return Err(()), + }; + + client_thread_state_write_lock_guard.connection_id_counter += 1; + let client_id = + ClientId::from_count(client_thread_state_write_lock_guard.connection_id_counter); + + client_thread_state_write_lock_guard.clients.insert( + client_id.clone(), + ClientState { + client_id: client_id.clone(), + sender: sender.clone(), + }, + ); + + // Explicitly unlock + drop(client_thread_state_write_lock_guard); + + // Send the client their new id. + if let Err(_) = sender.send(ServerMessage::YouAre(client_id.clone())) { + // We need to remove drop the client now. + drop_client_no_lock_guard(&client_id, client_thread_state.clone()); + return Err(()); + } + + Ok(client_id) +} + +/// [handle_client_message_disconnected] is a function that processes the client +/// message to disconnect a client from the service. +pub fn handle_client_message_disconnected( + client_id: ClientId, + client_thread_state: Arc>, +) -> Result<(), ()> { + // We might receive an implicit disconnect when attempting to + // send a message, as the receiving channel might be closed. + drop_client_no_lock_guard(&client_id, client_thread_state.clone()); + Ok(()) +} + +/// [handle_client_message_subscribe_latest_block] is a function that processes +/// the client message to subscribe to the latest block stream. +pub fn handle_client_message_subscribe_latest_block( + client_id: ClientId, + client_thread_state: Arc>, +) -> Result<(), ()> { + let mut client_thread_state_write_lock_guard = match client_thread_state.write() { + Ok(lock) => lock, + Err(_) => return Err(()), + }; + + client_thread_state_write_lock_guard + .subscribed_latest_block + .insert(client_id); + + // Explicitly unlock + drop(client_thread_state_write_lock_guard); + Ok(()) +} + +/// [handle_client_message_subscribe_node_identity] is a function that processes +/// the client message to subscribe to the node identity stream. +pub fn handle_client_message_subscribe_node_identity( + client_id: ClientId, + client_thread_state: Arc>, +) -> Result<(), ()> { + let mut client_thread_state_write_lock_guard = match client_thread_state.write() { + Ok(lock) => lock, + Err(_) => return Err(()), + }; + + client_thread_state_write_lock_guard + .subscribed_node_identity + .insert(client_id); + + // Explicitly unlock + drop(client_thread_state_write_lock_guard); + Ok(()) +} + +/// [handle_client_message_request_blocks_snapshot] is a function that processes +/// the client message request for a blocks snapshot. +pub fn handle_client_message_request_blocks_snapshot( + client_id: ClientId, + data_state: Arc>, + client_thread_state: Arc>, +) -> Result<(), ()> { + let client_thread_state_read_lock_guard = match client_thread_state.read() { + Ok(lock) => lock, + Err(_) => return Err(()), + }; + + let data_state_read_lock_guard = match data_state.read() { + Ok(lock) => lock, + Err(_) => { + drop(client_thread_state_read_lock_guard); + return Err(()); + } + }; + + let latest_blocks = data_state_read_lock_guard + .latest_blocks + .iter() + .map(|block| BlockDetail { + hash: block.hash, + proposer_id: block.proposer_id, + height: block.height, + size: block.size, + time: block.time, + num_transactions: block.num_transactions, + fee_recipient: block.fee_recipient, + block_reward: block.block_reward.clone(), + }) + .collect::>>(); + + if let Some(client) = client_thread_state_read_lock_guard.clients.get(&client_id) { + if let Err(_) = client.send_message(ServerMessage::BlocksSnapshot(latest_blocks)) { + drop_client_no_lock_guard(&client_id, client_thread_state.clone()); + } + } + + drop(data_state_read_lock_guard); + drop(client_thread_state_read_lock_guard); + + Ok(()) +} + +/// [handle_client_message_request_node_identity_snapshot] is a function that +/// processes the client message request for a node identity snapshot. +pub fn handle_client_message_request_node_identity_snapshot( + client_id: ClientId, + data_state: Arc>, + client_thread_state: Arc>, +) -> Result<(), ()> { + // Let's send the current Blocks Snapshot to the client + let client_thread_state_read_lock_guard = match client_thread_state.read() { + Ok(lock) => lock, + Err(_) => return Err(()), + }; + + let data_state_read_lock_guard = match data_state.read() { + Ok(lock) => lock, + Err(_) => { + drop(client_thread_state_read_lock_guard); + return Err(()); + } + }; + + let client_result = client_thread_state_read_lock_guard.clients.get(&client_id); + drop(data_state_read_lock_guard); + if let Some(client) = client_result { + if let Err(_) = client.send_message(ServerMessage::NodeIdentitySnapshot) { + drop(client_thread_state_read_lock_guard); + drop_client_no_lock_guard(&client_id, client_thread_state.clone()); + return Ok(()); + } + + drop_client_no_lock_guard(&client_id, client_thread_state.clone()); + return Ok(()); + } + + drop(client_thread_state_read_lock_guard); + return Ok(()); +} + +/// [handle_client_message_request_histogram_snapshot] is a function that +/// processes the client message request for a histogram snapshot. +pub fn handle_client_message_request_histogram_snapshot( + client_id: ClientId, + data_state: Arc>, + client_thread_state: Arc>, +) -> Result<(), ()> { + // Let's send the current histogram data snapshot to the client + let client_thread_state_read_lock_guard = match client_thread_state.read() { + Ok(lock) => lock, + Err(_) => return Err(()), + }; + + let data_state_read_lock_guard = match data_state.read() { + Ok(lock) => lock, + Err(_) => { + drop(client_thread_state_read_lock_guard); + return Err(()); + } + }; + + let histogram_data = ExplorerHistograms { + block_size: data_state_read_lock_guard + .latest_blocks + .iter() + .skip(1) + .map(|block| block.size) + .collect(), + block_time: data_state_read_lock_guard + .latest_blocks + .iter() + .skip(1) + .zip(data_state_read_lock_guard.latest_blocks.iter()) + .map(|(block_i, block_i_sub_1)| { + (block_i.time.0 - block_i_sub_1.time.0).whole_seconds() as u64 + }) + .collect(), + block_transactions: data_state_read_lock_guard + .latest_blocks + .iter() + .skip(1) + .map(|block| block.num_transactions) + .collect(), + block_heights: data_state_read_lock_guard + .latest_blocks + .iter() + .skip(1) + .map(|block| block.height) + .collect(), + }; + drop(data_state_read_lock_guard); + + if let Some(client) = client_thread_state_read_lock_guard.clients.get(&client_id) { + if let Err(_) = client.send_message(ServerMessage::HistogramSnapshot(histogram_data)) { + drop(client_thread_state_read_lock_guard); + drop_client_no_lock_guard(&client_id, client_thread_state.clone()); + return Ok(()); + } + + drop_client_no_lock_guard(&client_id, client_thread_state.clone()); + return Ok(()); + } + + drop(client_thread_state_read_lock_guard); + Ok(()) +} + +/// [process_client_message] is a function that processes the client message +/// and processes the message accordingly. +/// +/// The [DataState] is provided and is used only as a Read lock to distribute +/// the current state of the system to the clients upon request. +/// +/// The [ClientThreadState] is provided as it needs to be updated with new +/// subscriptions / new connections depending on the incoming +/// [InternalClientMessage] +pub fn process_client_message( + message: InternalClientMessage, + data_state: Arc>, + client_thread_state: Arc>, +) -> Result<(), ()> { + match message { + InternalClientMessage::Connected(sender) => { + handle_client_message_connected(sender, client_thread_state).map(|_| ()) + } + + InternalClientMessage::Disconnected(client_id) => { + handle_client_message_disconnected(client_id, client_thread_state) + } + + InternalClientMessage::SubscribeLatestBlock(client_id) => { + handle_client_message_subscribe_latest_block(client_id, client_thread_state) + } + + InternalClientMessage::SubscribeNodeIdentity(client_id) => { + handle_client_message_subscribe_node_identity(client_id, client_thread_state) + } + + InternalClientMessage::RequestBlocksSnapshot(client_id) => { + handle_client_message_request_blocks_snapshot( + client_id, + data_state, + client_thread_state, + ) + } + + InternalClientMessage::RequestNodeIdentitySnapshot(client_id) => { + handle_client_message_request_node_identity_snapshot( + client_id, + data_state, + client_thread_state, + ) + } + + InternalClientMessage::RequestHistogramSnapshot(client_id) => { + handle_client_message_request_histogram_snapshot( + client_id, + data_state, + client_thread_state, + ) + } + } +} + +/// [clone_block_detail] is a utility function that clones a [BlockDetail] +/// instance. +fn clone_block_detail(input: &BlockDetail) -> BlockDetail { + BlockDetail { + hash: input.hash.clone(), + proposer_id: input.proposer_id.clone(), + height: input.height, + size: input.size, + time: input.time, + num_transactions: input.num_transactions, + fee_recipient: input.fee_recipient.clone(), + block_reward: input.block_reward.clone(), + } +} + +/// [handle_received_block_detail] is a function that processes received Block +/// details and will attempt to distribute the message to all of the clients +/// that are subscribed to the latest block stream. +fn handle_received_block_detail( + client_thread_state: Arc>, + block_detail: BlockDetail, +) -> Result<(), ()> { + let client_thread_state_read_lock_guard = match client_thread_state.read() { + Ok(lock) => lock, + Err(_) => return Err(()), + }; + + // These are the clients who are subscribed to the latest blocks, that + // have an active ClientState within the system. + let latest_block_subscribers = client_thread_state_read_lock_guard + .subscribed_latest_block + .iter() + .map(|client_id| { + ( + client_id, + (&client_thread_state_read_lock_guard) + .clients + .get(client_id), + ) + }) + .filter(|(_, client)| client.is_some()); + + // We collect the results of sending the latest block to the clients. + let client_send_results = latest_block_subscribers.map(|(client_id, client)| { + // This is guaranteed to be a some now + let client = client.unwrap(); + let send_result = client.send_message(ServerMessage::LatestBlock(clone_block_detail( + &block_detail, + ))); + (client_id, send_result) + }); + + // These are the clients we failed to send the message to. We copy these + // here so we can drop our read lock. + let failed_client_sends = client_send_results + .filter(|(_, send_result)| send_result.is_err()) + .map(|(client_id, _)| client_id.clone()) + .collect::>(); + + // Explicitly Drop the read lock. + drop(client_thread_state_read_lock_guard); + + if failed_client_sends.is_empty() { + return Ok(()); + } + + // Let's acquire our write lock + let mut client_thread_state_write_lock_guard = match client_thread_state.write() { + Ok(lock) => lock, + Err(_) => return Err(()), + }; + + // We want to drop all of the failed clients. + // There's an optimization to be had here + for client_id in failed_client_sends { + drop_client_client_thread_state_write_guard( + &client_id, + &mut client_thread_state_write_lock_guard, + ); + } + + drop(client_thread_state_write_lock_guard); + + Ok(()) +} + +/// [process_client_handling_thread] is a function that processes the client +/// handling thread. This thread is responsible for managing the state of the +/// connected clients, and their subscriptions. +pub fn process_client_handling_thread( + receiver: Receiver, + data_state: Arc>, + client_thread_state: Arc>, +) { + while let Ok(message) = receiver.recv() { + if let Err(_) = + process_client_message(message, data_state.clone(), client_thread_state.clone()) + { + break; + } + } +} + +/// [process_client_handling_stream] is a function that processes the client +/// handling stream. This stream is responsible for managing the state of the +/// connected clients, and their subscriptions. +pub async fn process_client_handling_stream( + mut stream: S, + data_state: Arc>, + client_thread_state: Arc>, +) where + S: Stream + Unpin, +{ + while let Some(message) = stream.next().await { + if let Err(_) = + process_client_message(message, data_state.clone(), client_thread_state.clone()) + { + break; + } + } +} + +/// [process_distribute_client_handling_thread] is a function that processes the +/// the [Receiver] of incoming [BlockDetail] and distributes them to all +/// subscribed clients. +pub fn process_distribute_client_handling_thread( + client_thread_state: Arc>, + block_detail_receiver: Receiver>, +) { + while let Ok(block_detail) = block_detail_receiver.recv() { + if let Err(_) = handle_received_block_detail(client_thread_state.clone(), block_detail) { + break; + } + } +} + +/// [process_distribute_client_handling_stream] is a function that processes the +/// the [Stream] of incoming [BlockDetail] and distributes them to all +/// subscribed clients. +pub async fn process_distribute_client_handling_stream( + client_thread_state: Arc>, + mut stream: S, +) where + S: Stream> + Unpin, +{ + while let Some(block_detail) = stream.next().await { + if let Err(_) = handle_received_block_detail(client_thread_state.clone(), block_detail) { + break; + } + } +} + +#[cfg(test)] +mod tests { + use super::{process_client_handling_thread, ClientThreadState}; + use crate::service::{ + client_id::ClientId, client_message::InternalClientMessage, data_state::DataState, + server_message::ServerMessage, + }; + use circular_buffer::CircularBuffer; + use std::{ + collections::{HashMap, HashSet}, + sync::{mpsc, Arc, RwLock}, + thread, + }; + + #[test] + fn test_process_client_handling_stream() { + // Woo hoo + let (server_message_sender, server_message_receiver) = mpsc::channel(); + let (internal_client_message_sender, internal_client_message_receiver) = mpsc::channel(); + + let mut clients = HashMap::with_capacity(1); + + let mut subscribed_node_identity = HashSet::with_capacity(1); + let mut subscribed_latest_block = HashSet::with_capacity(1); + + let client_thread_state = ClientThreadState { + clients, + subscribed_latest_block, + subscribed_node_identity, + connection_id_counter: 1, + }; + + let client_thread_state = Arc::new(RwLock::new(client_thread_state)); + let data_state = Arc::new(RwLock::new(DataState { + latest_voters: CircularBuffer::new(), + latest_blocks: CircularBuffer::new(), + stake_table: Default::default(), + node_identity: vec![], + })); + + thread::spawn(move || { + process_client_handling_thread( + internal_client_message_receiver, + data_state.clone(), + client_thread_state.clone(), + ) + }); + + // Send a Connected Message to the server + assert_eq!( + internal_client_message_sender + .send(InternalClientMessage::Connected(server_message_sender)), + Ok(()) + ); + + assert_eq!( + server_message_receiver.recv(), + Ok(ServerMessage::YouAre(ClientId::from_count(2))), + ); + } +} diff --git a/node-metrics/src/service/data_state/mod.rs b/node-metrics/src/service/data_state/mod.rs new file mode 100644 index 000000000..8b7da10a0 --- /dev/null +++ b/node-metrics/src/service/data_state/mod.rs @@ -0,0 +1,270 @@ +use bitvec::vec::BitVec; +use circular_buffer::CircularBuffer; +use futures::{Stream, StreamExt}; +use hotshot_query_service::{ + availability::QueryableHeader, + explorer::{BlockDetail, ExplorerHeader, Timestamp}, + Leaf, Resolvable, +}; +use hotshot_stake_table::vec_based::StakeTable; +use hotshot_types::{ + light_client::{CircuitField, StateVerKey}, + signature_key::BLSPubKey, + traits::{ + block_contents::BlockHeader, + stake_table::{SnapshotVersion, StakeTableScheme}, + BlockPayload, + }, +}; +use sequencer::{Header, Payload, SeqTypes}; +use serde::{Deserialize, Serialize}; +use std::sync::mpsc::{Receiver, Sender}; +use std::{ + collections::HashSet, + iter::zip, + net::IpAddr, + sync::{Arc, RwLock}, +}; +use time::OffsetDateTime; + +/// MAX_HISTORY represents the last N records that are stored within the +/// DataState structure for the various different sample types. +const MAX_HISTORY: usize = 50; + +/// [LocationDetails] represents the details of the location of the node. +#[derive(Clone, PartialEq, Debug, Serialize, Deserialize)] +pub struct LocationDetails { + coords: (f64, f64), + country: String, +} + +impl LocationDetails { + pub fn new(coords: (f64, f64), country: String) -> Self { + Self { coords, country } + } +} + +/// [NodeIdentity] represents the identity of the node that is participating +/// in the network. +#[derive(Clone, PartialEq, Debug, Serialize, Deserialize)] +pub struct NodeIdentity { + public_key: BLSPubKey, + name: String, + wallet_address: String, + ip_addresses: Vec, + company: String, + location: Option, + operating_system: String, + node_type: String, + network_type: String, +} + +impl NodeIdentity { + pub fn new( + public_key: BLSPubKey, + name: String, + wallet_address: String, + ip_addresses: Vec, + company: String, + location: Option, + operating_system: String, + node_type: String, + network_type: String, + ) -> Self { + Self { + public_key, + name, + wallet_address, + ip_addresses, + company, + location, + operating_system, + node_type, + network_type, + } + } + + pub fn from_public_key(public_key: BLSPubKey) -> Self { + Self { + public_key, + name: String::new(), + wallet_address: String::new(), + ip_addresses: vec![], + company: String::new(), + location: None, + operating_system: String::new(), + node_type: String::new(), + network_type: String::new(), + } + } +} + +/// [DataState] represents the state of the data that is being stored within +/// the service. +pub struct DataState { + pub latest_blocks: CircularBuffer>, + pub latest_voters: CircularBuffer, + pub stake_table: StakeTable, + // Do we need any other data at the moment? + pub node_identity: Vec<(BLSPubKey, NodeIdentity)>, +} + +/// [create_block_detail_from_leaf] is a helper function that will build a +/// [BlockDetail] from the reference to [Leaf]. +fn create_block_detail_from_leaf(leaf: &Leaf) -> BlockDetail { + let block_header = leaf.block_header(); + let block_payload = leaf.block_payload().unwrap_or(Payload::empty().0); + + BlockDetail:: { + hash: block_header.commitment(), + height: block_header.height, + time: Timestamp( + OffsetDateTime::from_unix_timestamp(block_header.timestamp as i64) + .unwrap_or(OffsetDateTime::UNIX_EPOCH), + ), + proposer_id: block_header.proposer_id(), + num_transactions: block_payload.num_transactions(block_header.metadata()) as u64, + block_reward: vec![block_header.fee_info_balance().into()], + fee_recipient: block_header.fee_info_account(), + size: block_payload + .transactions(block_header.metadata()) + .fold(0, |acc, tx| acc + tx.payload().len() as u64), + } +} + +fn process_incoming_leaf( + leaf: Leaf, + data_state: Arc>, + block_sender: Sender>, +) -> Result<(), ()> +where + Header: BlockHeader + QueryableHeader + ExplorerHeader, + Payload: BlockPayload, +{ + let block_detail = create_block_detail_from_leaf(&leaf); + let block_detail_copy = create_block_detail_from_leaf(&leaf); + + let certificate = leaf.justify_qc(); + let signatures = &certificate.signatures; + + // Let's take a look at the quorum certificate signatures. + // It looks like all of these blocks are being decided by the + // same Quorum Certificate. + + // Where's the stake table? + let signatures = signatures.as_ref(); + + // Let's determine the the participants of the voter participants + // in the Quorum Certificate. + + // We shouldn't ever have a BitVec that is empty, with the possible + // exception of the genesis block. + let stake_table_voters_bit_vec = signatures.map_or(Default::default(), |sig| sig.1.clone()); + + // This BitVec should be in the same order as the Stake Table. + // The StakeTable will be able to change its order between epochs, + // which means that its order can change between blocks. + // However, the BitVec is a really nice size in order for storing + // information. We should be able to remap the BitVec order from + // the StakeTable order to our installed order representation. This + // should allow us to still store as a BitVec while containing our + // out order of the voters. + // We will need to recompute these BitVecs if the node information that + // is stored shrinks instead of growing. + + let mut data_state_write_lock_guard = match data_state.write() { + Ok(guard) => guard, + Err(_) => { + // This lock is poisoned, and we won't ever be able to + // acquire it. So we should just exit here. + return Err(()); + } + }; + + let stake_table = &data_state_write_lock_guard.stake_table; + let stable_table_entries_vec = stake_table + .try_iter(SnapshotVersion::LastEpochStart) + .map_or(vec![], |into_iter| into_iter.collect::>()); + + // We have a BitVec of voters who signed the QC. + // We can use this to determine the weight of the QC + let stake_table_entry_voter_participation_and_entries_pairs = + zip(stake_table_voters_bit_vec, stable_table_entries_vec); + let stake_table_keys_that_voted = stake_table_entry_voter_participation_and_entries_pairs + .filter(|(bit_ref, _)| *bit_ref) + .map(|(_, entry)| { + // Alright this is our entry that we care about. + // In this case, we just want to determine who voted for this + // Leaf. + + let (key, _, _): (BLSPubKey, _, _) = entry; + key + }); + + let voters_set: HashSet = stake_table_keys_that_voted.collect(); + + let voters_bitvec = data_state_write_lock_guard.node_identity.iter().fold( + BitVec::with_capacity(data_state_write_lock_guard.node_identity.len()), + |mut acc, key| { + if voters_set.contains(&key.0) { + acc.push(true); + } else { + acc.push(false); + } + acc + }, + ); + + data_state_write_lock_guard + .latest_blocks + .push_back(block_detail); + data_state_write_lock_guard + .latest_voters + .push_back(voters_bitvec); + + drop(data_state_write_lock_guard); + + if let Err(_) = block_sender.send(block_detail_copy) { + // We have an error that prevents us from continuing + return Err(()); + } + + Ok(()) +} + +/// [process_leaf_stream] allows for the consumption of a [Stream] when +/// attempting to process new incoming [Leaf]s. +pub async fn process_leaf_stream( + mut stream: S, + data_state: Arc>, + block_sender: Sender>, +) where + S: Stream> + Unpin, + Header: BlockHeader + QueryableHeader + ExplorerHeader, + Payload: BlockPayload, +{ + while let Some(leaf) = stream.next().await { + if let Err(_) = process_incoming_leaf(leaf, data_state.clone(), block_sender.clone()) { + // We have an error that prevents us from continuing + break; + } + } +} + +/// [process_leaf_thread] allows for the consumption of a [Receiver] when +/// attempting to process new incoming [Leaf]s. +pub fn process_leaf_thread( + receiver: Receiver>, + data_state: Arc>, + block_sender: Sender>, +) where + Header: BlockHeader + QueryableHeader + ExplorerHeader, + Payload: BlockPayload, +{ + while let Ok(leaf) = receiver.recv() { + if let Err(_) = process_incoming_leaf(leaf, data_state.clone(), block_sender.clone()) { + // We have an error that prevents us from continuing + break; + } + } +} diff --git a/node-metrics/src/service/mod.rs b/node-metrics/src/service/mod.rs new file mode 100644 index 000000000..90b95b953 --- /dev/null +++ b/node-metrics/src/service/mod.rs @@ -0,0 +1,6 @@ +pub mod client_id; +pub mod client_message; +pub mod client_state; +pub mod data_state; +pub mod node_type; +pub mod server_message; diff --git a/node-metrics/src/service/node_type/mod.rs b/node-metrics/src/service/node_type/mod.rs new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/node-metrics/src/service/node_type/mod.rs @@ -0,0 +1 @@ + diff --git a/node-metrics/src/service/server_message/mod.rs b/node-metrics/src/service/server_message/mod.rs new file mode 100644 index 000000000..23fe10d49 --- /dev/null +++ b/node-metrics/src/service/server_message/mod.rs @@ -0,0 +1,32 @@ +use super::client_id::ClientId; +use hotshot_query_service::explorer::{BlockDetail, ExplorerHistograms}; +use sequencer::SeqTypes; +use serde::{Deserialize, Serialize}; + +/// [ServerMessage] represents the messages that the server can send to the +/// client for a response. +#[derive(Debug, Serialize, Deserialize)] +pub enum ServerMessage { + /// This allows the client to know what client_id they have been assigned + YouAre(ClientId), + + /// LatestBlock is a message that is meant to show the most recent block + /// that has arrived. + LatestBlock(BlockDetail), + + /// LatestNodeIdentity is a message that is meant to show the most recent + /// node identity that has arrived. + LatestNodeIdentity, + + /// BlocksSnapshot is a message that is sent in response to a request for + /// the snapshot of block information that is available. + BlocksSnapshot(Vec>), + + /// NodeIdentitySnapshot is a message that is sent in response to a request + /// for the snapshot of the current node identity information. + NodeIdentitySnapshot, + + /// HistogramSnapshot is a message that is sent in response to to a request + /// for the snapshot of the current histogram information. + HistogramSnapshot(ExplorerHistograms), +} diff --git a/node-metrics/src/test/mod.rs b/node-metrics/src/test/mod.rs new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/node-metrics/src/test/mod.rs @@ -0,0 +1 @@ + From 86f404940b5b3ef8dcfa16390a4cdaeca8d356a8 Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Tue, 2 Jul 2024 07:13:10 -0600 Subject: [PATCH 07/72] Implement PartialEq for ServerMessage and ClientMessage Remove bad test from main --- node-metrics/src/main.rs | 14 -------------- node-metrics/src/service/client_message/mod.rs | 17 +++++++++++++++++ node-metrics/src/service/server_message/mod.rs | 14 ++++++++++++++ 3 files changed, 31 insertions(+), 14 deletions(-) diff --git a/node-metrics/src/main.rs b/node-metrics/src/main.rs index 26549a333..d70f090fc 100644 --- a/node-metrics/src/main.rs +++ b/node-metrics/src/main.rs @@ -48,18 +48,4 @@ async fn main() { // we want to keep track of and to be able to relay at a moment's notice. // The second is a state of the connected clients. This state should be // able to be read from and written to indirectly by the clients. - - let state = Arc::new(RwLock::new(State {})); - - let mut app = App::<_, node_validator::v0::Error>::with_state(state); - let node_validator_api_v0 = node_validator::v0::define_api().expect("api to be defined"); - app.register_module("node-validator", node_validator_api_v0) - .expect("register module"); - - // Serve the app - - let url = format!("0.0.0.0:9000"); - app.serve(&url, SERVICE_VER_0_1) - .await - .expect("app to be served"); } diff --git a/node-metrics/src/service/client_message/mod.rs b/node-metrics/src/service/client_message/mod.rs index 353854b5e..39c70c15b 100644 --- a/node-metrics/src/service/client_message/mod.rs +++ b/node-metrics/src/service/client_message/mod.rs @@ -20,6 +20,23 @@ pub enum InternalClientMessage { RequestHistogramSnapshot(ClientId), } +impl PartialEq for InternalClientMessage { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (Self::Connected(_), Self::Connected(_)) => true, + (Self::Disconnected(l0), Self::Disconnected(r0)) => l0 == r0, + (Self::SubscribeLatestBlock(l0), Self::SubscribeLatestBlock(r0)) => l0 == r0, + (Self::SubscribeNodeIdentity(l0), Self::SubscribeNodeIdentity(r0)) => l0 == r0, + (Self::RequestBlocksSnapshot(l0), Self::RequestBlocksSnapshot(r0)) => l0 == r0, + (Self::RequestNodeIdentitySnapshot(l0), Self::RequestNodeIdentitySnapshot(r0)) => { + l0 == r0 + } + (Self::RequestHistogramSnapshot(l0), Self::RequestHistogramSnapshot(r0)) => l0 == r0, + _ => false, + } + } +} + /// [ClientMessage] represents the messages that the client can send to the /// server for a request. /// diff --git a/node-metrics/src/service/server_message/mod.rs b/node-metrics/src/service/server_message/mod.rs index 23fe10d49..2fe8f41f0 100644 --- a/node-metrics/src/service/server_message/mod.rs +++ b/node-metrics/src/service/server_message/mod.rs @@ -30,3 +30,17 @@ pub enum ServerMessage { /// for the snapshot of the current histogram information. HistogramSnapshot(ExplorerHistograms), } + +impl PartialEq for ServerMessage { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (Self::YouAre(l0), Self::YouAre(r0)) => l0 == r0, + (Self::LatestBlock(l0), Self::LatestBlock(r0)) => l0 == r0, + (Self::LatestNodeIdentity, Self::LatestNodeIdentity) => true, + (Self::BlocksSnapshot(l0), Self::BlocksSnapshot(r0)) => l0 == r0, + (Self::NodeIdentitySnapshot, Self::NodeIdentitySnapshot) => true, + (Self::HistogramSnapshot(_), Self::HistogramSnapshot(_)) => false, + _ => false, + } + } +} From 424bd92b593dc1f59a09352efcd8d9b9ae222e9a Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Sun, 7 Jul 2024 21:48:42 -0600 Subject: [PATCH 08/72] Expand implementation Add more types for Server and Client message connection Refactor thread based blocking processing to async runtime processing Add several tests to ensure correct behavior --- Cargo.lock | 79 +- node-metrics/Cargo.toml | 15 +- node-metrics/src/api/node_validator/v0/mod.rs | 307 ++- node-metrics/src/main.rs | 6 - node-metrics/src/service/client_id/mod.rs | 156 +- .../src/service/client_message/mod.rs | 217 +- node-metrics/src/service/client_state/mod.rs | 1785 ++++++++++++++--- .../service/data_state/location_details.rs | 95 + node-metrics/src/service/data_state/mod.rs | 311 ++- .../src/service/data_state/node_identity.rs | 232 +++ .../src/service/server_message/mod.rs | 37 +- 11 files changed, 2753 insertions(+), 487 deletions(-) create mode 100644 node-metrics/src/service/data_state/location_details.rs create mode 100644 node-metrics/src/service/data_state/node_identity.rs diff --git a/Cargo.lock b/Cargo.lock index 919c3636b..cbb42cd0b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1327,7 +1327,7 @@ dependencies = [ "surf", "surf-disco", "tagged-base64", - "tide-disco", + "tide-disco 0.8.0", "tracing", "url", "vbs", @@ -4122,7 +4122,7 @@ dependencies = [ "serde", "snafu 0.8.4", "tagged-base64", - "tide-disco", + "tide-disco 0.8.0", "toml", "vbs", ] @@ -4151,7 +4151,7 @@ dependencies = [ "snafu 0.8.4", "surf-disco", "tagged-base64", - "tide-disco", + "tide-disco 0.8.0", "tokio", "tracing", "vbs", @@ -4200,7 +4200,7 @@ dependencies = [ "serde", "snafu 0.8.4", "tagged-base64", - "tide-disco", + "tide-disco 0.8.0", "toml", "tracing", "vbs", @@ -4291,7 +4291,7 @@ dependencies = [ "serde_json", "surf-disco", "thiserror", - "tide-disco", + "tide-disco 0.8.0", "tokio", "toml", "tracing", @@ -4342,7 +4342,7 @@ dependencies = [ "surf-disco", "tagged-base64", "tempfile", - "tide-disco", + "tide-disco 0.8.0", "time 0.3.36", "tokio", "tokio-postgres", @@ -4419,7 +4419,7 @@ dependencies = [ "snafu 0.8.4", "surf-disco", "tagged-base64", - "tide-disco", + "tide-disco 0.8.0", "time 0.3.36", "toml", "tracing", @@ -4515,7 +4515,7 @@ dependencies = [ "sha3", "snafu 0.8.4", "tagged-base64", - "tide-disco", + "tide-disco 0.8.0", "tokio", "tracing", "url", @@ -6453,9 +6453,12 @@ dependencies = [ "hotshot-types", "sequencer", "serde", - "tide-disco", + "serde_json", + "surf-disco", + "tide-disco 0.9.0", "time 0.3.36", "toml", + "tracing", "vbs", ] @@ -8481,7 +8484,7 @@ dependencies = [ "tagged-base64", "tempfile", "thiserror", - "tide-disco", + "tide-disco 0.8.0", "time 0.3.36", "tokio-postgres", "toml", @@ -9354,7 +9357,7 @@ dependencies = [ "reqwest 0.12.5", "serde", "serde_json", - "tide-disco", + "tide-disco 0.8.0", "tracing", "vbs", ] @@ -9704,6 +9707,60 @@ dependencies = [ "vbs", ] +[[package]] +name = "tide-disco" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01e81752cd71cc517973c6ff743919848d7c9890331f38c43abadfb49eede3a6" +dependencies = [ + "anyhow", + "async-h1", + "async-lock 3.4.0", + "async-std", + "async-trait", + "clap", + "config", + "derivative", + "derive_more", + "dirs", + "edit-distance", + "futures", + "futures-util", + "http 1.1.0", + "include_dir", + "itertools 0.12.1", + "lazy_static", + "libc", + "markdown", + "maud", + "parking_lot", + "pin-project", + "prometheus", + "reqwest 0.12.5", + "routefinder", + "semver 1.0.23", + "serde", + "serde_json", + "serde_with", + "shellexpand", + "signal-hook", + "signal-hook-async-std", + "snafu 0.8.3", + "strum", + "strum_macros", + "tagged-base64", + "tide", + "tide-websockets", + "toml", + "tracing", + "tracing-distributed", + "tracing-futures", + "tracing-log", + "tracing-subscriber 0.3.18", + "url", + "vbs", +] + [[package]] name = "tide-websockets" version = "0.4.0" diff --git a/node-metrics/Cargo.toml b/node-metrics/Cargo.toml index ae5f659fb..22fccbded 100644 --- a/node-metrics/Cargo.toml +++ b/node-metrics/Cargo.toml @@ -6,9 +6,7 @@ authors = { workspace = true } edition = { workspace = true } [features] -default = ["libp2p"] -testing = ["hotshot-testing"] -libp2p = [] +testing = ["sequencer/testing", "serde_json"] [dependencies] async-compatibility-layer = { workspace = true } @@ -17,14 +15,17 @@ bitvec = { workspace = true } circular-buffer = { workspace = true } futures = { workspace = true } hotshot-query-service = { workspace = true } -hotshot-types = { workspace = true } hotshot-stake-table = { workspace = true } -sequencer = { path = "../sequencer" } +hotshot-types = { workspace = true } +sequencer = { path = "../sequencer"} serde = { workspace = true } -tide-disco = { workspace = true } +serde_json = { version = "^1.0.113", optional = true } +surf-disco = { workspace = true } +tide-disco = { version = "0.9.0" } time = { workspace = true } +tracing = { workspace = true } toml = { workspace = true } vbs = { workspace = true } # Dependencies for feature `testing` -hotshot-testing = { workspace = true, optional = true } +hotshot-testing = { workspace = true, optional = true } diff --git a/node-metrics/src/api/node_validator/v0/mod.rs b/node-metrics/src/api/node_validator/v0/mod.rs index 64ea78d0f..bcfd436d1 100644 --- a/node-metrics/src/api/node_validator/v0/mod.rs +++ b/node-metrics/src/api/node_validator/v0/mod.rs @@ -1,10 +1,12 @@ use crate::service::client_message::{ClientMessage, InternalClientMessage}; use crate::service::server_message::ServerMessage; -use futures::SinkExt; -use futures::{future::FutureExt, stream::StreamExt}; +use futures::future::Either; +use futures::{ + channel::mpsc::{self, Sender}, + FutureExt, SinkExt, StreamExt, +}; use serde::{Deserialize, Serialize}; use std::fmt; -use std::sync::mpsc::{self, Sender}; use tide_disco::socket::Connection; use tide_disco::{api::ApiError, Api}; use vbs::version::{StaticVersion, StaticVersionType, Version}; @@ -25,6 +27,7 @@ pub const BASE_VERSION: Version = VERSION_0_1; /// Specific type for version 0.1 pub type Version01 = StaticVersion; + // Static instance of the Version01 type pub const STATIC_VER_0_1: Version01 = StaticVersion {}; @@ -49,6 +52,16 @@ impl tide_disco::Error for Error { } } +impl surf_disco::Error for Error { + fn catch_all(_status: surf_disco::StatusCode, _msg: String) -> Self { + todo!() + } + + fn status(&self) -> surf_disco::StatusCode { + todo!() + } +} + #[derive(Debug)] pub enum LoadApiError { Toml(toml::de::Error), @@ -143,53 +156,114 @@ where "details", move |_req, socket: Connection, state| { async move { - let client_message_sender = state.sender(); - let (server_message_sender, server_message_receiver) = mpsc::channel(); + let mut socket_stream = socket.clone(); + let mut socket_sink = socket; + + let mut internal_client_message_sender = state.sender(); + let (server_message_sender, mut server_message_receiver) = mpsc::channel(32); // Let's register ourselves with the Server - if let Err(_) = client_message_sender + if let Err(err) = internal_client_message_sender .send(InternalClientMessage::Connected(server_message_sender)) + .await { - todo!(); + // This means that the client_message_sender is closed + // we need to exit the stream. + tracing::info!( + "client message sender is closed before first message: {}", + err + ); + return Ok(()); } // We should receive a response from the server that identifies us // uniquely. - let client_id = - if let Ok(ServerMessage::YouAre(client_id)) = server_message_receiver.recv() { - client_id - } else { - todo!(); - }; - - let (sink, mut stream) = socket.split(); - - // Now we want to just auto-forward any server message to the client - // in it's own thread, and we also want to aut forward any client - // message to the server. - let client_id = client_id.clone(); - let client_message_sender = client_message_sender.clone(); - let handle = async_std::task::spawn(async move { - let mut sink = sink; - while let Ok(message) = server_message_receiver.recv() { - if let Err(_) = sink.send(&message).await { - // we're closed at this point - break; + let client_id = if let Some(ServerMessage::YouAre(client_id)) = + server_message_receiver.next().await + { + client_id + } else { + // The channel is closed, and this client should be removed + // we need to exit the stream + tracing::info!("server message receiver closed before first message",); + return Ok(()); + }; + + // We want to start these futures outside of the loop. If we + // don't do this then every iteration will not be guaranteed + // to not skip a message. + let mut next_client_message = socket_stream.next(); + let mut next_server_message = server_message_receiver.next(); + + loop { + match futures::future::select(next_client_message, next_server_message).await { + Either::Left((client_request, remaining_server_message)) => { + let client_request = if let Some(client_request) = client_request { + client_request + } else { + // The client has disconnected, we need to exit the stream + tracing::info!("client message has disconnected"); + break; + }; + + let client_request = if let Ok(client_request) = client_request { + client_request + } else { + // This indicates that there was a more + // specific error with the socket message. + // This error can be various, and may be + // recoverable depending on the actual nature + // of the error. We will treat it as + // unrecoverable for now. + break; + }; + + let internal_client_message = + client_request.to_internal_with_client_id(client_id); + if let Err(err) = internal_client_message_sender + .send(internal_client_message) + .await + { + // This means that the client_message_sender is closed + tracing::info!("client message sender is closed: {}", err); + break; + } + + // let's queue up the next client message to receive + next_client_message = socket_stream.next(); + next_server_message = remaining_server_message; + } + Either::Right((server_message, remaining_client_message)) => { + // Alright, we have a server message, we want to forward it + // to the down-stream client. + + let server_message = if let Some(server_message) = server_message { + server_message + } else { + // The server has disconnected, we need to exit the stream + break; + }; + + // We want to forward the message to the client + if let Err(err) = socket_sink.send(&server_message).await { + // This means that the socket is closed + tracing::info!("socket is closed: {}", err); + break; + } + + // let's queue up the next server message to receive + next_server_message = server_message_receiver.next(); + next_client_message = remaining_client_message; } - } - }); - - // Start forwarding message from the client - while let Some(Ok(request)) = stream.next().await { - let internal_client_message = - request.to_internal_with_client_id(client_id.clone()); - if let Err(_) = client_message_sender.send(internal_client_message) { - todo!(); } } - // wait for the spawned task to finish - handle.await; + // We don't actually care if this fails or not, as we're exiting + // this function anyway, and these Senders and Receivers will + // automatically be dropped. + _ = internal_client_message_sender + .send(InternalClientMessage::Disconnected(client_id)) + .await; Ok(()) } @@ -201,21 +275,28 @@ where #[cfg(test)] mod tests { - use super::StateClientMessageSender; - use crate::service::client_message::InternalClientMessage; - use std::sync::mpsc::{self, Receiver, Sender}; - - struct TestState( - pub(crate) Sender, - pub(crate) Receiver, - ); - - impl TestState { - fn new() -> Self { - let (sender, receiver) = mpsc::channel(); - TestState(sender, receiver) - } - } + use super::{Error, StateClientMessageSender, Version01, STATIC_VER_0_1}; + use crate::service::{ + client_id::ClientId, + client_message::InternalClientMessage, + client_state::{ + process_distribute_block_detail_handling_stream, + process_distribute_node_identity_handling_stream, + process_distribute_voters_handling_stream, process_internal_client_message_stream, + ClientThreadState, + }, + data_state::{process_leaf_stream, DataState}, + }; + use async_std::sync::RwLock; + use futures::{ + channel::mpsc::{self, Sender}, + SinkExt, StreamExt, + }; + use sequencer::Leaf; + use std::sync::Arc; + use tide_disco::App; + + struct TestState(Sender); impl StateClientMessageSender for TestState { fn sender(&self) -> Sender { @@ -223,18 +304,122 @@ mod tests { } } - unsafe impl Send for TestState {} - unsafe impl Sync for TestState {} + #[async_std::test] + async fn test_api_creation() { + let node_validator_api_result = super::define_api::(); - // Woo hoo - #[test] - fn test_api_creation() { - let api = super::define_api::(); - match api { - Ok(_) => {} + let node_validator_api = match node_validator_api_result { + Ok(api) => api, Err(e) => { panic!("Error: {:?}", e); } + }; + + let (sender, receiver) = mpsc::channel(32); + let mut app: App = App::with_state(TestState(sender)); + let register_module_result = app.register_module("node-validator", node_validator_api); + + if let Err(e) = register_module_result { + panic!("Error: {:?}", e); } + + let data_state = DataState::new( + Default::default(), + Default::default(), + Default::default(), + Default::default(), + ); + + let client_thread_state = ClientThreadState::new( + Default::default(), + Default::default(), + Default::default(), + Default::default(), + ClientId::from_count(1), + ); + + let data_state = Arc::new(RwLock::new(data_state)); + let client_thread_state = Arc::new(RwLock::new(client_thread_state)); + let (block_detail_sender, block_detail_receiver) = mpsc::channel(32); + let (leaf_sender, leaf_receiver) = mpsc::channel(32); + let (_node_identity_sender, node_identity_receiver) = mpsc::channel(32); + let (_voters_sender, voters_receiver) = mpsc::channel(32); + + let _process_internal_client_message_handle = + async_std::task::spawn(process_internal_client_message_stream( + receiver, + data_state.clone(), + client_thread_state.clone(), + )); + + let _process_distribute_block_detail_handle = + async_std::task::spawn(process_distribute_block_detail_handling_stream( + client_thread_state.clone(), + block_detail_receiver, + )); + + let _process_distribute_node_identity_handle = + async_std::task::spawn(process_distribute_node_identity_handling_stream( + client_thread_state.clone(), + node_identity_receiver, + )); + + let _process_distribute_voters_handle = async_std::task::spawn( + process_distribute_voters_handling_stream(client_thread_state.clone(), voters_receiver), + ); + + let _process_leaf_stream_handle = async_std::task::spawn(process_leaf_stream( + leaf_receiver, + data_state.clone(), + block_detail_sender, + )); + + let _leaf_retriever_handle = async_std::task::spawn(async move { + // Alright, let's get some leaves, bro + + let client: surf_disco::Client = surf_disco::Client::new( + "https://query.cappuccino.testnet.espresso.network/v0" + .parse() + .unwrap(), + ); + + let block_height_result = client.get("status/block-height").send().await; + let block_height: u64 = if let Ok(block_height) = block_height_result { + block_height + } else { + tracing::info!("block height request failed"); + return; + }; + + let start_block_height = block_height.saturating_sub(50); + + let mut leaf_sender = leaf_sender; + let mut leaves = client + .socket(&format!( + "availability/stream/leaves/{}", + start_block_height + )) + .subscribe::() + .await + .unwrap(); + + loop { + let leaf_result = leaves.next().await; + let leaf = if let Some(Ok(leaf)) = leaf_result { + leaf + } else { + tracing::info!("leaf stream closed"); + break; + }; + + let leaf_send_result = leaf_sender.send(leaf).await; + if let Err(err) = leaf_send_result { + tracing::info!("leaf sender closed: {}", err); + break; + } + } + }); + + let _app_serve_result = app.serve("0.0.0.0:9000", STATIC_VER_0_1).await; } } diff --git a/node-metrics/src/main.rs b/node-metrics/src/main.rs index d70f090fc..7f1b5d3f3 100644 --- a/node-metrics/src/main.rs +++ b/node-metrics/src/main.rs @@ -1,7 +1,3 @@ -use std::sync::{Arc, RwLock}; - -use node_metrics::api::node_validator; -use tide_disco::App; use vbs::version::{StaticVersion, Version}; /// CONSTANT for protocol major version @@ -31,8 +27,6 @@ pub const SERVICE_VER_0_1: Version01 = StaticVersion {}; #[derive(Clone)] pub struct ClientDef; -struct State {} - /// ClientConnectionMessage is a message that indicates when a client is /// connecting or disconnecting from the service. This message is used /// to signify when the client arrives or leaves. diff --git a/node-metrics/src/service/client_id/mod.rs b/node-metrics/src/service/client_id/mod.rs index ac34be7f8..11353b6e5 100644 --- a/node-metrics/src/service/client_id/mod.rs +++ b/node-metrics/src/service/client_id/mod.rs @@ -1,12 +1,164 @@ use serde::{Deserialize, Serialize}; +use std::ops::{Add, AddAssign}; /// [ClientId] represents the unique identifier for a client that is connected /// to the server. +/// +/// Example: +/// ```rust +/// # use node_metrics::service::client_id::ClientId; +/// +/// let client_id = ClientId::from_count(1); +/// +/// # assert_eq!(ClientId::from_count(1), client_id); +/// let client_id_2 = client_id + 1; +/// +/// # assert_ne!(client_id, client_id_2); +/// +/// let mut client_id_3 = client_id; +/// client_id_3 += 1; +/// +/// # assert_eq!(client_id_2, client_id_3); +/// ``` #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] -pub struct ClientId(u128); +pub struct ClientId(u64); impl ClientId { - pub fn from_count(count: u128) -> Self { + pub fn from_count(count: u64) -> Self { ClientId(count) } } + +/// [Add] implements basic addition for [ClientId], which allows [u64]s to be +/// added to the [ClientId] for convenience. +/// +/// Example: +/// +/// ```rust +/// +/// # use node_metrics::service::client_id::ClientId; +/// +/// let client_id = ClientId::from_count(1); +/// let new_client_id = client_id + 1; +/// +/// # assert_eq!(ClientId::from_count(2), new_client_id); +/// # assert_ne!(client_id, new_client_id); +/// ``` +impl Add for ClientId { + type Output = Self; + + fn add(self, rhs: u64) -> Self::Output { + ClientId(self.0 + rhs) + } +} + +/// [AddAssign] implements basic addition for [ClientId], which allows [u64]s to +/// be added to the mutable [ClientId] for convenience. +/// +/// Example: +/// +/// ```rust +/// # use node_metrics::service::client_id::ClientId; +/// +/// let mut client_id = ClientId::from_count(1); +/// client_id += 1; +/// +/// # assert_eq!(ClientId::from_count(2), client_id); +/// ``` +impl AddAssign for ClientId { + fn add_assign(&mut self, rhs: u64) { + self.0 += rhs; + } +} + +#[cfg(test)] +mod tests { + use super::ClientId; + + #[test] + fn test_client_id_debug() { + let client_id = ClientId::from_count(1); + assert_eq!(format!("{:?}", client_id), "ClientId(1)"); + } + + #[test] + #[allow(clippy::clone_on_copy)] + fn test_client_id_clone() { + let client_id = ClientId::from_count(1); + let cloned_client_id = client_id.clone(); + assert_eq!(client_id, cloned_client_id); + } + + #[test] + fn test_client_id_partial_eq() { + let client_id_1 = ClientId::from_count(1); + let client_id_2 = ClientId::from_count(2); + let client_id_3 = ClientId::from_count(1); + + assert_ne!(client_id_1, client_id_2); + assert_eq!(client_id_1, client_id_3); + } + + #[test] + fn test_client_id_eq() { + let client_id_1 = ClientId::from_count(1); + + client_id_1.assert_receiver_is_total_eq(); + } + + #[test] + fn test_hash() { + use std::collections::hash_map::DefaultHasher; + use std::hash::{Hash, Hasher}; + + let hash_1 = { + let client_id = ClientId::from_count(1); + let mut hasher = DefaultHasher::new(); + client_id.hash(&mut hasher); + hasher.finish() + }; + + let hash_2 = { + let client_id = ClientId::from_count(2); + let mut hasher = DefaultHasher::new(); + client_id.hash(&mut hasher); + hasher.finish() + }; + + let hash_3 = { + let client_id = ClientId::from_count(1); + let mut hasher = DefaultHasher::new(); + client_id.hash(&mut hasher); + hasher.finish() + }; + + assert_eq!(hash_1, hash_3); + assert_ne!(hash_1, hash_2); + assert_ne!(hash_2, hash_3); + } + + #[test] + fn test_add() { + let client_id = ClientId::from_count(1); + let new_client_id = client_id + 1; + assert_eq!(new_client_id, ClientId::from_count(2)); + } + + #[test] + fn test_add_assign() { + let mut client_id = ClientId::from_count(1); + client_id += 1; + assert_eq!(client_id, ClientId::from_count(2)); + } + + #[test] + #[cfg(feature = "testing")] + fn test_serialization() { + use serde_json; + let client_id = ClientId::from_count(1); + let serialized = serde_json::to_string(&client_id).unwrap(); + let deserialized: ClientId = serde_json::from_str(&serialized).unwrap(); + + assert_eq!(deserialized, client_id); + } +} diff --git a/node-metrics/src/service/client_message/mod.rs b/node-metrics/src/service/client_message/mod.rs index 39c70c15b..2ca97a79f 100644 --- a/node-metrics/src/service/client_message/mod.rs +++ b/node-metrics/src/service/client_message/mod.rs @@ -1,59 +1,69 @@ -use serde::{Deserialize, Serialize}; - use super::client_id::ClientId; use super::server_message::ServerMessage; -use std::sync::mpsc::Sender; +use futures::channel::mpsc::Sender; +use serde::{Deserialize, Serialize}; + +/// [ClientMessage] represents the messages that the client can send to the +/// server for a request. +/// +#[derive(Debug, PartialEq, Serialize, Deserialize)] +pub enum ClientMessage { + SubscribeLatestBlock, + SubscribeNodeIdentity, + SubscribeVoters, + + RequestBlocksSnapshot, + RequestNodeIdentitySnapshot, + RequestHistogramSnapshot, + RequestVotersSnapshot, +} /// InternalClientMessage represents the message requests that the client can /// send to the server. These messages are request that the client can send /// in order for the server to send back responses that correspond to the /// request. +#[derive(Debug)] pub enum InternalClientMessage { Connected(Sender), Disconnected(ClientId), SubscribeLatestBlock(ClientId), SubscribeNodeIdentity(ClientId), + SubscribeVoters(ClientId), RequestBlocksSnapshot(ClientId), RequestNodeIdentitySnapshot(ClientId), RequestHistogramSnapshot(ClientId), + RequestVotersSnapshot(ClientId), } impl PartialEq for InternalClientMessage { fn eq(&self, other: &Self) -> bool { match (self, other) { + // We don't care about the [Sender] here, as it is unable to be + // compared. (Self::Connected(_), Self::Connected(_)) => true, - (Self::Disconnected(l0), Self::Disconnected(r0)) => l0 == r0, - (Self::SubscribeLatestBlock(l0), Self::SubscribeLatestBlock(r0)) => l0 == r0, - (Self::SubscribeNodeIdentity(l0), Self::SubscribeNodeIdentity(r0)) => l0 == r0, - (Self::RequestBlocksSnapshot(l0), Self::RequestBlocksSnapshot(r0)) => l0 == r0, - (Self::RequestNodeIdentitySnapshot(l0), Self::RequestNodeIdentitySnapshot(r0)) => { - l0 == r0 + (Self::Disconnected(lhs), Self::Disconnected(rhs)) => lhs == rhs, + (Self::SubscribeLatestBlock(lhs), Self::SubscribeLatestBlock(rhs)) => lhs == rhs, + (Self::SubscribeNodeIdentity(lhs), Self::SubscribeNodeIdentity(rhs)) => lhs == rhs, + (Self::SubscribeVoters(lhs), Self::SubscribeVoters(rhs)) => lhs == rhs, + (Self::RequestBlocksSnapshot(lhs), Self::RequestBlocksSnapshot(rhs)) => lhs == rhs, + (Self::RequestNodeIdentitySnapshot(lhs), Self::RequestNodeIdentitySnapshot(rhs)) => { + lhs == rhs + } + (Self::RequestHistogramSnapshot(lhs), Self::RequestHistogramSnapshot(rhs)) => { + lhs == rhs } - (Self::RequestHistogramSnapshot(l0), Self::RequestHistogramSnapshot(r0)) => l0 == r0, + (Self::RequestVotersSnapshot(lhs), Self::RequestVotersSnapshot(rhs)) => lhs == rhs, _ => false, } } } -/// [ClientMessage] represents the messages that the client can send to the -/// server for a request. -/// -#[derive(Debug, PartialEq, Serialize, Deserialize)] -pub enum ClientMessage { - SubscribeLatestBlock, - SubscribeNodeIdentity, - - RequestBlocksSnapshot, - RequestNodeIdentitySnapshot, - RequestHistogramSnapshot, -} - impl ClientMessage { /// [to_internal_with_client_id] converts the [ClientMessage] into an /// [InternalClientMessage] with the given [ClientId]. - pub fn to_internal_with_client_id(self, client_id: ClientId) -> InternalClientMessage { + pub fn to_internal_with_client_id(&self, client_id: ClientId) -> InternalClientMessage { match self { ClientMessage::SubscribeLatestBlock => { InternalClientMessage::SubscribeLatestBlock(client_id) @@ -61,6 +71,7 @@ impl ClientMessage { ClientMessage::SubscribeNodeIdentity => { InternalClientMessage::SubscribeNodeIdentity(client_id) } + ClientMessage::SubscribeVoters => InternalClientMessage::SubscribeVoters(client_id), ClientMessage::RequestBlocksSnapshot => { InternalClientMessage::RequestBlocksSnapshot(client_id) } @@ -70,6 +81,164 @@ impl ClientMessage { ClientMessage::RequestHistogramSnapshot => { InternalClientMessage::RequestHistogramSnapshot(client_id) } + ClientMessage::RequestVotersSnapshot => { + InternalClientMessage::RequestVotersSnapshot(client_id) + } + } + } +} + +#[cfg(test)] +mod tests { + use super::InternalClientMessage; + use super::*; + use std::iter::zip; + + #[test] + fn test_client_message_partial_eq() { + let messages = [ + ClientMessage::SubscribeLatestBlock, + ClientMessage::SubscribeNodeIdentity, + ClientMessage::SubscribeVoters, + ClientMessage::RequestBlocksSnapshot, + ClientMessage::RequestNodeIdentitySnapshot, + ClientMessage::RequestHistogramSnapshot, + ]; + + for (l, r) in zip(messages.iter(), messages.iter()) { + assert_eq!(l, r); + } + + for i in 1..messages.len() { + for (l, r) in zip( + messages.iter(), + messages.iter().skip(i).chain(messages.iter().take(i)), + ) { + assert_ne!(l, r); + } + } + } + + #[test] + fn test_client_message_debug() { + let messages = [ + ClientMessage::SubscribeLatestBlock, + ClientMessage::SubscribeNodeIdentity, + ClientMessage::SubscribeVoters, + ClientMessage::RequestBlocksSnapshot, + ClientMessage::RequestNodeIdentitySnapshot, + ClientMessage::RequestHistogramSnapshot, + ]; + + for message in messages.iter() { + assert_eq!(format!("{:?}", message), format!("{:?}", message)); + } + } + + #[test] + #[cfg(feature = "testing")] + fn test_client_message_serialize() { + use serde_json; + + let messages = [ + ClientMessage::SubscribeLatestBlock, + ClientMessage::SubscribeNodeIdentity, + ClientMessage::SubscribeVoters, + ClientMessage::RequestBlocksSnapshot, + ClientMessage::RequestNodeIdentitySnapshot, + ClientMessage::RequestHistogramSnapshot, + ]; + + for message in messages.iter() { + let serialized = serde_json::to_string(message).unwrap(); + let deserialized: ClientMessage = serde_json::from_str(&serialized).unwrap(); + assert_eq!(*message, deserialized); + } + } + + #[test] + fn test_client_message_to_internal_with_client_id() { + let messages = [ + ClientMessage::SubscribeLatestBlock, + ClientMessage::SubscribeNodeIdentity, + ClientMessage::SubscribeVoters, + ClientMessage::RequestBlocksSnapshot, + ClientMessage::RequestNodeIdentitySnapshot, + ClientMessage::RequestHistogramSnapshot, + ]; + + for message in messages { + for i in 0..10 { + let client_id = ClientId::from_count(i); + let internal_client_message = message.to_internal_with_client_id(client_id); + match internal_client_message { + InternalClientMessage::SubscribeLatestBlock(id) => { + assert_eq!(id, client_id); + } + InternalClientMessage::SubscribeNodeIdentity(id) => { + assert_eq!(id, client_id); + } + InternalClientMessage::SubscribeVoters(id) => { + assert_eq!(id, client_id); + } + InternalClientMessage::RequestBlocksSnapshot(id) => { + assert_eq!(id, client_id); + } + InternalClientMessage::RequestNodeIdentitySnapshot(id) => { + assert_eq!(id, client_id); + } + InternalClientMessage::RequestHistogramSnapshot(id) => { + assert_eq!(id, client_id); + } + _ => panic!("Unexpected InternalClientMessage"), + } + } + } + } + + #[test] + fn test_internal_client_message_partial_eq() { + let (sender, _) = futures::channel::mpsc::channel(1); + let messages = [ + InternalClientMessage::Connected(sender), + InternalClientMessage::Disconnected(ClientId::from_count(1)), + InternalClientMessage::SubscribeLatestBlock(ClientId::from_count(1)), + InternalClientMessage::SubscribeNodeIdentity(ClientId::from_count(1)), + InternalClientMessage::SubscribeVoters(ClientId::from_count(1)), + InternalClientMessage::RequestBlocksSnapshot(ClientId::from_count(1)), + InternalClientMessage::RequestNodeIdentitySnapshot(ClientId::from_count(1)), + InternalClientMessage::RequestHistogramSnapshot(ClientId::from_count(1)), + ]; + + for (l, r) in zip(messages.iter(), messages.iter()) { + assert_eq!(l, r); + } + + for i in 1..messages.len() { + for (l, r) in zip( + messages.iter(), + messages.iter().skip(i).chain(messages.iter().take(i)), + ) { + assert_ne!(l, r); + } + } + + for j in 2..12 { + let iter_messages = [ + InternalClientMessage::Disconnected(ClientId::from_count(j)), + InternalClientMessage::SubscribeLatestBlock(ClientId::from_count(j)), + InternalClientMessage::SubscribeNodeIdentity(ClientId::from_count(j)), + InternalClientMessage::SubscribeVoters(ClientId::from_count(j)), + InternalClientMessage::RequestBlocksSnapshot(ClientId::from_count(j)), + InternalClientMessage::RequestNodeIdentitySnapshot(ClientId::from_count(j)), + InternalClientMessage::RequestHistogramSnapshot(ClientId::from_count(j)), + ]; + + // We skip the first message, as we don't want to include the + // Connected message. + for (l, r) in zip(messages.iter().skip(1), iter_messages.iter()) { + assert_ne!(l, r); + } } } } diff --git a/node-metrics/src/service/client_state/mod.rs b/node-metrics/src/service/client_state/mod.rs index 8b195184e..f45711e74 100644 --- a/node-metrics/src/service/client_state/mod.rs +++ b/node-metrics/src/service/client_state/mod.rs @@ -1,17 +1,20 @@ use super::{ - client_id::ClientId, client_message::InternalClientMessage, data_state::DataState, + client_id::ClientId, + client_message::InternalClientMessage, + data_state::{DataState, NodeIdentity}, server_message::ServerMessage, }; -use futures::{Stream, StreamExt}; +use async_std::sync::{RwLock, RwLockWriteGuard}; +use bitvec::vec::BitVec; +use futures::{ + channel::mpsc::{SendError, Sender}, + SinkExt, Stream, StreamExt, +}; use hotshot_query_service::explorer::{BlockDetail, ExplorerHistograms}; use sequencer::SeqTypes; -use std::sync::RwLockWriteGuard; use std::{ collections::{HashMap, HashSet}, - sync::{ - mpsc::{Receiver, SendError, Sender}, - Arc, RwLock, - }, + sync::Arc, }; /// ClientState represents the service state of the connected clients. @@ -29,9 +32,12 @@ impl ClientState { Self { client_id, sender } } - /// Send a message to the client's consuming thread. - pub fn send_message(&self, message: ServerMessage) -> Result<(), SendError> { - self.sender.send(message) + pub fn client_id(&self) -> ClientId { + self.client_id + } + + pub fn sender(&self) -> &Sender { + &self.sender } } @@ -42,7 +48,26 @@ pub struct ClientThreadState { clients: HashMap, subscribed_latest_block: HashSet, subscribed_node_identity: HashSet, - connection_id_counter: u128, + subscribed_voters: HashSet, + connection_id_counter: ClientId, +} + +impl ClientThreadState { + pub fn new( + clients: HashMap, + subscribed_latest_block: HashSet, + subscribed_node_identity: HashSet, + subscribed_voters: HashSet, + connection_id_counter: ClientId, + ) -> Self { + Self { + clients, + subscribed_latest_block, + subscribed_node_identity, + subscribed_voters, + connection_id_counter, + } + } } /// [drop_client_client_thread_state_write_guard] is a utility function for @@ -64,14 +89,11 @@ fn drop_client_client_thread_state_write_guard( /// [drop_client_no_lock_guard] is a utility function for cleaning up the [ClientThreadState] /// when a client is detected as disconnected. -fn drop_client_no_lock_guard( +async fn drop_client_no_lock_guard( client_id: &ClientId, client_thread_state: Arc>, ) -> Option { - let mut client_thread_state_write_lock_guard = match client_thread_state.write() { - Ok(lock) => lock, - Err(_) => return None, - }; + let mut client_thread_state_write_lock_guard = client_thread_state.write().await; drop_client_client_thread_state_write_guard( client_id, @@ -79,25 +101,46 @@ fn drop_client_no_lock_guard( ) } +/// [HandleConnectedError] represents the scope of errors that can be +/// returned from the [handle_client_message_connected] function. +#[derive(Debug)] +pub enum HandleConnectedError { + ClientSendError(SendError), +} + +impl std::fmt::Display for HandleConnectedError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + HandleConnectedError::ClientSendError(err) => { + write!(f, "handle connected error: client send error: {}", err) + } + } + } +} + +impl std::error::Error for HandleConnectedError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + HandleConnectedError::ClientSendError(err) => Some(err), + } + } +} + /// [handle_client_message_connected] is a function that processes the client /// message to connect a client to the service. -pub fn handle_client_message_connected( - sender: Sender, +pub async fn handle_client_message_connected( + mut sender: Sender, client_thread_state: Arc>, -) -> Result { - let mut client_thread_state_write_lock_guard = match client_thread_state.write() { - Ok(lock) => lock, - Err(_) => return Err(()), - }; +) -> Result { + let mut client_thread_state_write_lock_guard = client_thread_state.write().await; client_thread_state_write_lock_guard.connection_id_counter += 1; - let client_id = - ClientId::from_count(client_thread_state_write_lock_guard.connection_id_counter); + let client_id = client_thread_state_write_lock_guard.connection_id_counter; client_thread_state_write_lock_guard.clients.insert( - client_id.clone(), + client_id, ClientState { - client_id: client_id.clone(), + client_id, sender: sender.clone(), }, ); @@ -106,10 +149,10 @@ pub fn handle_client_message_connected( drop(client_thread_state_write_lock_guard); // Send the client their new id. - if let Err(_) = sender.send(ServerMessage::YouAre(client_id.clone())) { + if let Err(err) = sender.send(ServerMessage::YouAre(client_id)).await { // We need to remove drop the client now. - drop_client_no_lock_guard(&client_id, client_thread_state.clone()); - return Err(()); + drop_client_no_lock_guard(&client_id, client_thread_state.clone()).await; + return Err(HandleConnectedError::ClientSendError(err)); } Ok(client_id) @@ -117,26 +160,22 @@ pub fn handle_client_message_connected( /// [handle_client_message_disconnected] is a function that processes the client /// message to disconnect a client from the service. -pub fn handle_client_message_disconnected( +pub async fn handle_client_message_disconnected( client_id: ClientId, client_thread_state: Arc>, -) -> Result<(), ()> { +) { // We might receive an implicit disconnect when attempting to // send a message, as the receiving channel might be closed. - drop_client_no_lock_guard(&client_id, client_thread_state.clone()); - Ok(()) + drop_client_no_lock_guard(&client_id, client_thread_state.clone()).await; } /// [handle_client_message_subscribe_latest_block] is a function that processes /// the client message to subscribe to the latest block stream. -pub fn handle_client_message_subscribe_latest_block( +pub async fn handle_client_message_subscribe_latest_block( client_id: ClientId, client_thread_state: Arc>, -) -> Result<(), ()> { - let mut client_thread_state_write_lock_guard = match client_thread_state.write() { - Ok(lock) => lock, - Err(_) => return Err(()), - }; +) { + let mut client_thread_state_write_lock_guard = client_thread_state.write().await; client_thread_state_write_lock_guard .subscribed_latest_block @@ -144,19 +183,15 @@ pub fn handle_client_message_subscribe_latest_block( // Explicitly unlock drop(client_thread_state_write_lock_guard); - Ok(()) } /// [handle_client_message_subscribe_node_identity] is a function that processes /// the client message to subscribe to the node identity stream. -pub fn handle_client_message_subscribe_node_identity( +pub async fn handle_client_message_subscribe_node_identity( client_id: ClientId, client_thread_state: Arc>, -) -> Result<(), ()> { - let mut client_thread_state_write_lock_guard = match client_thread_state.write() { - Ok(lock) => lock, - Err(_) => return Err(()), - }; +) { + let mut client_thread_state_write_lock_guard = client_thread_state.write().await; client_thread_state_write_lock_guard .subscribed_node_identity @@ -164,32 +199,65 @@ pub fn handle_client_message_subscribe_node_identity( // Explicitly unlock drop(client_thread_state_write_lock_guard); - Ok(()) +} + +/// [handle_client_message_subscribe_voters] is a function that processes +/// the client message to subscribe to the voters bitvecs. +pub async fn handle_client_message_subscribe_voters( + client_id: ClientId, + client_thread_state: Arc>, +) { + let mut client_thread_state_write_lock_guard = client_thread_state.write().await; + + client_thread_state_write_lock_guard + .subscribed_voters + .insert(client_id); + + // Explicitly unlock + drop(client_thread_state_write_lock_guard); +} + +/// [HandleRequestBlocksSnapshotsError] represents the scope of errors that can +/// be returned from the [handle_client_message_request_blocks_snapshot] function. +#[derive(Debug)] +pub enum HandleRequestBlocksSnapshotsError { + ClientSendError(SendError), +} + +impl std::fmt::Display for HandleRequestBlocksSnapshotsError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + HandleRequestBlocksSnapshotsError::ClientSendError(err) => { + write!( + f, + "handle request blocks snapshot error: client send error:: {}", + err + ) + } + } + } +} + +impl std::error::Error for HandleRequestBlocksSnapshotsError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + HandleRequestBlocksSnapshotsError::ClientSendError(err) => Some(err), + } + } } /// [handle_client_message_request_blocks_snapshot] is a function that processes /// the client message request for a blocks snapshot. -pub fn handle_client_message_request_blocks_snapshot( +pub async fn handle_client_message_request_blocks_snapshot( client_id: ClientId, data_state: Arc>, client_thread_state: Arc>, -) -> Result<(), ()> { - let client_thread_state_read_lock_guard = match client_thread_state.read() { - Ok(lock) => lock, - Err(_) => return Err(()), - }; - - let data_state_read_lock_guard = match data_state.read() { - Ok(lock) => lock, - Err(_) => { - drop(client_thread_state_read_lock_guard); - return Err(()); - } - }; +) -> Result<(), HandleRequestBlocksSnapshotsError> { + let (client_thread_state_read_lock_guard, data_state_read_lock_guard) = + futures::join!(client_thread_state.read(), data_state.read()); let latest_blocks = data_state_read_lock_guard - .latest_blocks - .iter() + .latest_blocks() .map(|block| BlockDetail { hash: block.hash, proposer_id: block.proposer_id, @@ -203,122 +271,315 @@ pub fn handle_client_message_request_blocks_snapshot( .collect::>>(); if let Some(client) = client_thread_state_read_lock_guard.clients.get(&client_id) { - if let Err(_) = client.send_message(ServerMessage::BlocksSnapshot(latest_blocks)) { - drop_client_no_lock_guard(&client_id, client_thread_state.clone()); + let mut sender = client.sender.clone(); + if let Err(err) = sender + .send(ServerMessage::BlocksSnapshot(Arc::new(latest_blocks))) + .await + { + drop_client_no_lock_guard(&client_id, client_thread_state.clone()).await; + return Err(HandleRequestBlocksSnapshotsError::ClientSendError(err)); } } - drop(data_state_read_lock_guard); - drop(client_thread_state_read_lock_guard); - Ok(()) } +/// [HandleRequestNodeIdentitySnapshotError] represents the scope of errors that +/// can be returned from the [handle_client_message_request_node_identity_snapshot] +/// function. +#[derive(Debug)] +pub enum HandleRequestNodeIdentitySnapshotError { + ClientSendError(SendError), +} + +impl std::fmt::Display for HandleRequestNodeIdentitySnapshotError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + HandleRequestNodeIdentitySnapshotError::ClientSendError(err) => { + write!( + f, + "handle request node identity snapshot error: client send error: {}", + err + ) + } + } + } +} + +impl std::error::Error for HandleRequestNodeIdentitySnapshotError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + HandleRequestNodeIdentitySnapshotError::ClientSendError(err) => Some(err), + } + } +} + /// [handle_client_message_request_node_identity_snapshot] is a function that /// processes the client message request for a node identity snapshot. -pub fn handle_client_message_request_node_identity_snapshot( +pub async fn handle_client_message_request_node_identity_snapshot( client_id: ClientId, data_state: Arc>, client_thread_state: Arc>, -) -> Result<(), ()> { +) -> Result<(), HandleRequestNodeIdentitySnapshotError> { // Let's send the current Blocks Snapshot to the client - let client_thread_state_read_lock_guard = match client_thread_state.read() { - Ok(lock) => lock, - Err(_) => return Err(()), - }; - - let data_state_read_lock_guard = match data_state.read() { - Ok(lock) => lock, - Err(_) => { - drop(client_thread_state_read_lock_guard); - return Err(()); - } - }; - + let (client_thread_state_read_lock_guard, data_state_read_lock_guard) = + futures::join!(client_thread_state.read(), data_state.read()); let client_result = client_thread_state_read_lock_guard.clients.get(&client_id); - drop(data_state_read_lock_guard); if let Some(client) = client_result { - if let Err(_) = client.send_message(ServerMessage::NodeIdentitySnapshot) { + let mut sender = client.sender.clone(); + + // Let's copy the current node identity snapshot and send them + let nodes = data_state_read_lock_guard + .node_identity() + .map(|(_, node)| node.clone()) + .collect::>(); + + if let Err(err) = sender + .send(ServerMessage::NodeIdentitySnapshot(Arc::new(nodes))) + .await + { drop(client_thread_state_read_lock_guard); - drop_client_no_lock_guard(&client_id, client_thread_state.clone()); - return Ok(()); + drop_client_no_lock_guard(&client_id, client_thread_state.clone()).await; + return Err(HandleRequestNodeIdentitySnapshotError::ClientSendError(err)); } - drop_client_no_lock_guard(&client_id, client_thread_state.clone()); return Ok(()); } - drop(client_thread_state_read_lock_guard); - return Ok(()); + Ok(()) +} + +/// [HandleRequestHistogramSnapshotError] represents the scope of errors that +/// can be returned from the [handle_client_message_request_histogram_snapshot] +/// function. +#[derive(Debug)] +pub enum HandleRequestHistogramSnapshotError { + ClientSendError(SendError), +} + +impl std::fmt::Display for HandleRequestHistogramSnapshotError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + HandleRequestHistogramSnapshotError::ClientSendError(err) => { + write!( + f, + "handle request histogram snapshot error: client send error: {}", + err + ) + } + } + } +} + +impl std::error::Error for HandleRequestHistogramSnapshotError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + HandleRequestHistogramSnapshotError::ClientSendError(err) => Some(err), + } + } } /// [handle_client_message_request_histogram_snapshot] is a function that /// processes the client message request for a histogram snapshot. -pub fn handle_client_message_request_histogram_snapshot( +pub async fn handle_client_message_request_histogram_snapshot( client_id: ClientId, data_state: Arc>, client_thread_state: Arc>, -) -> Result<(), ()> { +) -> Result<(), HandleRequestHistogramSnapshotError> { // Let's send the current histogram data snapshot to the client - let client_thread_state_read_lock_guard = match client_thread_state.read() { - Ok(lock) => lock, - Err(_) => return Err(()), - }; - - let data_state_read_lock_guard = match data_state.read() { - Ok(lock) => lock, - Err(_) => { - drop(client_thread_state_read_lock_guard); - return Err(()); - } - }; + let (client_thread_state_read_lock_guard, data_state_read_lock_guard) = + futures::join!(client_thread_state.read(), data_state.read()); let histogram_data = ExplorerHistograms { block_size: data_state_read_lock_guard - .latest_blocks - .iter() + .latest_blocks() .skip(1) .map(|block| block.size) .collect(), block_time: data_state_read_lock_guard - .latest_blocks - .iter() + .latest_blocks() .skip(1) - .zip(data_state_read_lock_guard.latest_blocks.iter()) + .zip(data_state_read_lock_guard.latest_blocks()) .map(|(block_i, block_i_sub_1)| { (block_i.time.0 - block_i_sub_1.time.0).whole_seconds() as u64 }) .collect(), block_transactions: data_state_read_lock_guard - .latest_blocks - .iter() + .latest_blocks() .skip(1) .map(|block| block.num_transactions) .collect(), block_heights: data_state_read_lock_guard - .latest_blocks - .iter() + .latest_blocks() .skip(1) .map(|block| block.height) .collect(), }; + let arc_histogram_data = Arc::new(histogram_data); drop(data_state_read_lock_guard); if let Some(client) = client_thread_state_read_lock_guard.clients.get(&client_id) { - if let Err(_) = client.send_message(ServerMessage::HistogramSnapshot(histogram_data)) { - drop(client_thread_state_read_lock_guard); - drop_client_no_lock_guard(&client_id, client_thread_state.clone()); - return Ok(()); + let mut sender = client.sender.clone(); + drop(client_thread_state_read_lock_guard); + + if let Err(err) = sender + .send(ServerMessage::HistogramSnapshot(arc_histogram_data)) + .await + { + drop_client_no_lock_guard(&client_id, client_thread_state.clone()).await; + return Err(HandleRequestHistogramSnapshotError::ClientSendError(err)); } - drop_client_no_lock_guard(&client_id, client_thread_state.clone()); return Ok(()); } - drop(client_thread_state_read_lock_guard); Ok(()) } +#[derive(Debug)] +pub enum HandleRequestVotersSnapshotError { + ClientSendError(SendError), +} + +impl std::fmt::Display for HandleRequestVotersSnapshotError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + HandleRequestVotersSnapshotError::ClientSendError(err) => { + write!( + f, + "handle request voters snapshot error: client send error: {}", + err + ) + } + } + } +} + +impl std::error::Error for HandleRequestVotersSnapshotError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + HandleRequestVotersSnapshotError::ClientSendError(err) => Some(err), + } + } +} + +/// [handle_client_message_request_voters_snapshot] is a function that processes +/// the client message request for a voters snapshot. +pub async fn handle_client_message_request_voters_snapshot( + client_id: ClientId, + data_state: Arc>, + client_thread_state: Arc>, +) -> Result<(), HandleRequestVotersSnapshotError> { + let (client_thread_state_read_lock_guard, data_state_read_lock_guard) = + futures::join!(client_thread_state.read(), data_state.read()); + + let voters_data = data_state_read_lock_guard + .latest_voters() + .cloned() + .collect::>(); + + let voters_data = Arc::new(voters_data); + + if let Some(client) = client_thread_state_read_lock_guard.clients.get(&client_id) { + let mut sender = client.sender.clone(); + drop(client_thread_state_read_lock_guard); + + if let Err(err) = sender + .send(ServerMessage::VotersSnapshot(voters_data.clone())) + .await + { + drop_client_no_lock_guard(&client_id, client_thread_state.clone()).await; + return Err(HandleRequestVotersSnapshotError::ClientSendError(err)); + } + + return Ok(()); + } + Ok(()) +} + +/// [ProcessClientMessageError] represents the scope of errors that can be +/// returned from the [process_client_message] function. +#[derive(Debug)] +pub enum ProcessClientMessageError { + Connected(HandleConnectedError), + BlocksSnapshot(HandleRequestBlocksSnapshotsError), + NodeIdentitySnapshot(HandleRequestNodeIdentitySnapshotError), + HistogramSnapshot(HandleRequestHistogramSnapshotError), + VotersSnapshot(HandleRequestVotersSnapshotError), +} + +impl From for ProcessClientMessageError { + fn from(err: HandleConnectedError) -> Self { + ProcessClientMessageError::Connected(err) + } +} + +impl From for ProcessClientMessageError { + fn from(err: HandleRequestBlocksSnapshotsError) -> Self { + ProcessClientMessageError::BlocksSnapshot(err) + } +} + +impl From for ProcessClientMessageError { + fn from(err: HandleRequestNodeIdentitySnapshotError) -> Self { + ProcessClientMessageError::NodeIdentitySnapshot(err) + } +} + +impl From for ProcessClientMessageError { + fn from(err: HandleRequestHistogramSnapshotError) -> Self { + ProcessClientMessageError::HistogramSnapshot(err) + } +} + +impl From for ProcessClientMessageError { + fn from(err: HandleRequestVotersSnapshotError) -> Self { + ProcessClientMessageError::VotersSnapshot(err) + } +} + +impl std::fmt::Display for ProcessClientMessageError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ProcessClientMessageError::Connected(err) => { + write!(f, "process client message error: connected: {}", err) + } + ProcessClientMessageError::BlocksSnapshot(err) => { + write!(f, "process client message error: blocks snapshot: {}", err) + } + ProcessClientMessageError::NodeIdentitySnapshot(err) => { + write!( + f, + "process client message error: node identity snapshot: {}", + err + ) + } + ProcessClientMessageError::HistogramSnapshot(err) => { + write!( + f, + "process client message error: histogram snapshot: {}", + err + ) + } + ProcessClientMessageError::VotersSnapshot(err) => { + write!(f, "process client message error: voters snapshot: {}", err) + } + } + } +} + +impl std::error::Error for ProcessClientMessageError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + ProcessClientMessageError::Connected(err) => Some(err), + ProcessClientMessageError::BlocksSnapshot(err) => Some(err), + ProcessClientMessageError::NodeIdentitySnapshot(err) => Some(err), + ProcessClientMessageError::HistogramSnapshot(err) => Some(err), + ProcessClientMessageError::VotersSnapshot(err) => Some(err), + } + } +} + /// [process_client_message] is a function that processes the client message /// and processes the message accordingly. /// @@ -328,26 +589,35 @@ pub fn handle_client_message_request_histogram_snapshot( /// The [ClientThreadState] is provided as it needs to be updated with new /// subscriptions / new connections depending on the incoming /// [InternalClientMessage] -pub fn process_client_message( +pub async fn process_client_message( message: InternalClientMessage, data_state: Arc>, client_thread_state: Arc>, -) -> Result<(), ()> { +) -> Result<(), ProcessClientMessageError> { match message { InternalClientMessage::Connected(sender) => { - handle_client_message_connected(sender, client_thread_state).map(|_| ()) + handle_client_message_connected(sender, client_thread_state).await?; + Ok(()) } InternalClientMessage::Disconnected(client_id) => { - handle_client_message_disconnected(client_id, client_thread_state) + handle_client_message_disconnected(client_id, client_thread_state).await; + Ok(()) } InternalClientMessage::SubscribeLatestBlock(client_id) => { - handle_client_message_subscribe_latest_block(client_id, client_thread_state) + handle_client_message_subscribe_latest_block(client_id, client_thread_state).await; + Ok(()) } InternalClientMessage::SubscribeNodeIdentity(client_id) => { - handle_client_message_subscribe_node_identity(client_id, client_thread_state) + handle_client_message_subscribe_node_identity(client_id, client_thread_state).await; + Ok(()) + } + + InternalClientMessage::SubscribeVoters(client_id) => { + handle_client_message_subscribe_voters(client_id, client_thread_state).await; + Ok(()) } InternalClientMessage::RequestBlocksSnapshot(client_id) => { @@ -356,6 +626,8 @@ pub fn process_client_message( data_state, client_thread_state, ) + .await?; + Ok(()) } InternalClientMessage::RequestNodeIdentitySnapshot(client_id) => { @@ -364,6 +636,8 @@ pub fn process_client_message( data_state, client_thread_state, ) + .await?; + Ok(()) } InternalClientMessage::RequestHistogramSnapshot(client_id) => { @@ -372,36 +646,64 @@ pub fn process_client_message( data_state, client_thread_state, ) + .await?; + Ok(()) + } + + InternalClientMessage::RequestVotersSnapshot(client_id) => { + handle_client_message_request_voters_snapshot( + client_id, + data_state, + client_thread_state, + ) + .await?; + Ok(()) } } } /// [clone_block_detail] is a utility function that clones a [BlockDetail] /// instance. -fn clone_block_detail(input: &BlockDetail) -> BlockDetail { +pub fn clone_block_detail(input: &BlockDetail) -> BlockDetail { BlockDetail { - hash: input.hash.clone(), - proposer_id: input.proposer_id.clone(), + hash: input.hash, + proposer_id: input.proposer_id, height: input.height, size: input.size, time: input.time, num_transactions: input.num_transactions, - fee_recipient: input.fee_recipient.clone(), + fee_recipient: input.fee_recipient, block_reward: input.block_reward.clone(), } } +/// [drop_failed_client_sends] is a function that will drop all of the failed +/// client sends from the client thread state. +async fn drop_failed_client_sends( + client_thread_state: Arc>, + failed_client_sends: Vec, +) { + // Let's acquire our write lock + let mut client_thread_state_write_lock_guard = client_thread_state.write().await; + + // We want to drop all of the failed clients. + // There's an optimization to be had here + for client_id in failed_client_sends { + drop_client_client_thread_state_write_guard( + &client_id, + &mut client_thread_state_write_lock_guard, + ); + } +} + /// [handle_received_block_detail] is a function that processes received Block /// details and will attempt to distribute the message to all of the clients /// that are subscribed to the latest block stream. -fn handle_received_block_detail( +async fn handle_received_block_detail( client_thread_state: Arc>, block_detail: BlockDetail, -) -> Result<(), ()> { - let client_thread_state_read_lock_guard = match client_thread_state.read() { - Ok(lock) => lock, - Err(_) => return Err(()), - }; +) { + let client_thread_state_read_lock_guard = client_thread_state.read().await; // These are the clients who are subscribed to the latest blocks, that // have an active ClientState within the system. @@ -411,181 +713,1154 @@ fn handle_received_block_detail( .map(|client_id| { ( client_id, - (&client_thread_state_read_lock_guard) - .clients - .get(client_id), + client_thread_state_read_lock_guard.clients.get(client_id), ) }) .filter(|(_, client)| client.is_some()); + let arc_block_detail = Arc::new(block_detail); // We collect the results of sending the latest block to the clients. - let client_send_results = latest_block_subscribers.map(|(client_id, client)| { - // This is guaranteed to be a some now - let client = client.unwrap(); - let send_result = client.send_message(ServerMessage::LatestBlock(clone_block_detail( - &block_detail, - ))); - (client_id, send_result) + let client_send_result_future = latest_block_subscribers.map(|(client_id, client)| { + let arc_block_detail = arc_block_detail.clone(); + async move { + // This is guaranteed to be a some now + let client = client.unwrap(); + let mut sender = client.sender.clone(); + let send_result = sender + .send(ServerMessage::LatestBlock(arc_block_detail)) + .await; + + (client_id, send_result) + } }); + let client_send_results = futures::future::join_all(client_send_result_future).await; + // These are the clients we failed to send the message to. We copy these // here so we can drop our read lock. let failed_client_sends = client_send_results + .into_iter() .filter(|(_, send_result)| send_result.is_err()) - .map(|(client_id, _)| client_id.clone()) + .map(|(client_id, _)| *client_id) .collect::>(); // Explicitly Drop the read lock. drop(client_thread_state_read_lock_guard); if failed_client_sends.is_empty() { - return Ok(()); + return; } - // Let's acquire our write lock - let mut client_thread_state_write_lock_guard = match client_thread_state.write() { - Ok(lock) => lock, - Err(_) => return Err(()), - }; - - // We want to drop all of the failed clients. - // There's an optimization to be had here - for client_id in failed_client_sends { - drop_client_client_thread_state_write_guard( - &client_id, - &mut client_thread_state_write_lock_guard, - ); - } - - drop(client_thread_state_write_lock_guard); - - Ok(()) + drop_failed_client_sends(client_thread_state, failed_client_sends).await; } -/// [process_client_handling_thread] is a function that processes the client -/// handling thread. This thread is responsible for managing the state of the -/// connected clients, and their subscriptions. -pub fn process_client_handling_thread( - receiver: Receiver, - data_state: Arc>, +/// [handle_received_node_identity] is a function that processes received +/// NodeIdentity and will attempt to distribute the message to all of the +/// clients that are subscribed to the node identity stream. +async fn handle_received_node_identity( client_thread_state: Arc>, + node_identity: NodeIdentity, ) { - while let Ok(message) = receiver.recv() { - if let Err(_) = - process_client_message(message, data_state.clone(), client_thread_state.clone()) - { - break; - } - } -} + let client_thread_state_read_lock_guard = client_thread_state.read().await; -/// [process_client_handling_stream] is a function that processes the client -/// handling stream. This stream is responsible for managing the state of the -/// connected clients, and their subscriptions. -pub async fn process_client_handling_stream( - mut stream: S, - data_state: Arc>, - client_thread_state: Arc>, -) where - S: Stream + Unpin, -{ - while let Some(message) = stream.next().await { - if let Err(_) = - process_client_message(message, data_state.clone(), client_thread_state.clone()) - { - break; + // These are the clients who are subscribed to the node identities, that + // have an active ClientState within the system. + let node_identity_subscribers = client_thread_state_read_lock_guard + .subscribed_node_identity + .iter() + .map(|client_id| { + ( + client_id, + client_thread_state_read_lock_guard.clients.get(client_id), + ) + }) + .filter(|(_, client)| client.is_some()); + + let arc_node_identity = Arc::new(node_identity); + // We collect the results of sending the latest block to the clients. + let client_send_result_future = node_identity_subscribers.map(|(client_id, client)| { + let arc_node_identity = arc_node_identity.clone(); + async move { + // This is guaranteed to be a some now + let client = client.unwrap(); + let mut sender = client.sender.clone(); + let send_result = sender + .send(ServerMessage::LatestNodeIdentity(arc_node_identity.clone())) + .await; + + (client_id, send_result) } + }); + + let client_send_results = futures::future::join_all(client_send_result_future).await; + + // These are the clients we failed to send the message to. We copy these + // here so we can drop our read lock. + let failed_client_sends = client_send_results + .into_iter() + .filter(|(_, send_result)| send_result.is_err()) + .map(|(client_id, _)| *client_id) + .collect::>(); + + // Explicitly Drop the read lock. + drop(client_thread_state_read_lock_guard); + + if failed_client_sends.is_empty() { + return; } + + drop_failed_client_sends(client_thread_state, failed_client_sends).await; } -/// [process_distribute_client_handling_thread] is a function that processes the -/// the [Receiver] of incoming [BlockDetail] and distributes them to all -/// subscribed clients. -pub fn process_distribute_client_handling_thread( +/// [handle_received_voters] is a function that processes received voters and +/// will attempt to distribute the message to all of the clients that are +/// subscribed to the voters stream. +async fn handle_received_voters( client_thread_state: Arc>, - block_detail_receiver: Receiver>, + voters: BitVec, ) { - while let Ok(block_detail) = block_detail_receiver.recv() { - if let Err(_) = handle_received_block_detail(client_thread_state.clone(), block_detail) { - break; - } - } -} + let client_thread_state_read_lock_guard = client_thread_state.read().await; -/// [process_distribute_client_handling_stream] is a function that processes the -/// the [Stream] of incoming [BlockDetail] and distributes them to all -/// subscribed clients. -pub async fn process_distribute_client_handling_stream( - client_thread_state: Arc>, - mut stream: S, -) where - S: Stream> + Unpin, -{ - while let Some(block_detail) = stream.next().await { - if let Err(_) = handle_received_block_detail(client_thread_state.clone(), block_detail) { - break; - } - } + // These are the clients who are subscribed to the node identities, that + // have an active ClientState within the system. + let node_identity_subscribers = client_thread_state_read_lock_guard + .subscribed_node_identity + .iter() + .map(|client_id| { + ( + client_id, + client_thread_state_read_lock_guard.clients.get(client_id), + ) + }) + .filter(|(_, client)| client.is_some()); + + // We collect the results of sending the latest block to the clients. + let client_send_result_future = node_identity_subscribers.map(|(client_id, client)| { + let voters = voters.clone(); + async move { + // This is guaranteed to be a some now + let client = client.unwrap(); + let mut sender = client.sender.clone(); + let send_result = sender.send(ServerMessage::LatestVoters(voters)).await; + + (client_id, send_result) + } + }); + + let client_send_results = futures::future::join_all(client_send_result_future).await; + + // These are the clients we failed to send the message to. We copy these + // here so we can drop our read lock. + let failed_client_sends = client_send_results + .into_iter() + .filter(|(_, send_result)| send_result.is_err()) + .map(|(client_id, _)| *client_id) + .collect::>(); + + // Explicitly Drop the read lock. + drop(client_thread_state_read_lock_guard); + + if failed_client_sends.is_empty() { + return; + } + + drop_failed_client_sends(client_thread_state, failed_client_sends).await; +} + +/// [process_internal_client_message_stream] is a function that processes the +/// client handling stream. This stream is responsible for managing the state +/// of the connected clients, and their subscriptions. +pub async fn process_internal_client_message_stream( + mut stream: S, + data_state: Arc>, + client_thread_state: Arc>, +) where + S: Stream + Unpin, +{ + loop { + let message_result = stream.next().await; + let message = if let Some(message) = message_result { + message + } else { + tracing::info!("internal client message handler closed."); + return; + }; + + if let Err(err) = + process_client_message(message, data_state.clone(), client_thread_state.clone()).await + { + tracing::info!( + "internal client message processing encountered an error: {}", + err, + ); + return; + } + } +} + +/// [process_distribute_block_detail_handling_stream] is a function that +/// processes the the [Stream] of incoming [BlockDetail] and distributes them +/// to all subscribed clients. +pub async fn process_distribute_block_detail_handling_stream( + client_thread_state: Arc>, + mut stream: S, +) where + S: Stream> + Unpin, +{ + loop { + let block_detail_result = stream.next().await; + + let block_detail = if let Some(block_detail) = block_detail_result { + block_detail + } else { + tracing::info!("block detail stream closed. shutting down client handling stream.",); + return; + }; + + handle_received_block_detail(client_thread_state.clone(), block_detail).await + } +} + +/// [process_distribute_node_identity_handling_stream] is a function that +/// processes the the [Stream] of incoming [NodeIdentity] and distributes them +/// to all subscribed clients. +pub async fn process_distribute_node_identity_handling_stream( + client_thread_state: Arc>, + mut stream: S, +) where + S: Stream + Unpin, +{ + loop { + let node_identity_result = stream.next().await; + + let node_identity = if let Some(node_identity) = node_identity_result { + node_identity + } else { + tracing::info!("node identity stream closed. shutting down client handling stream.",); + return; + }; + + handle_received_node_identity(client_thread_state.clone(), node_identity).await + } +} + +/// [process_distribute_voters_handling_stream] is a function that processes +/// the the [Stream] of incoming [BitVec] and distributes them to all +/// subscribed clients. +pub async fn process_distribute_voters_handling_stream( + client_thread_state: Arc>, + mut stream: S, +) where + S: Stream + Unpin, +{ + loop { + let voters_result = stream.next().await; + + let voters = if let Some(voters) = voters_result { + voters + } else { + tracing::info!("voters stream closed. shutting down client handling stream.",); + return; + }; + + handle_received_voters(client_thread_state.clone(), voters).await + } } #[cfg(test)] -mod tests { - use super::{process_client_handling_thread, ClientThreadState}; +pub mod tests { + use super::{process_internal_client_message_stream, ClientThreadState}; use crate::service::{ - client_id::ClientId, client_message::InternalClientMessage, data_state::DataState, + client_id::ClientId, + client_message::InternalClientMessage, + client_state::{ + process_distribute_block_detail_handling_stream, + process_distribute_node_identity_handling_stream, + process_distribute_voters_handling_stream, + }, + data_state::{ + create_block_detail_from_leaf, process_leaf_stream, DataState, LocationDetails, + NodeIdentity, + }, server_message::ServerMessage, }; - use circular_buffer::CircularBuffer; + use async_std::sync::RwLock; + use bitvec::vec::BitVec; + use futures::{channel::mpsc, FutureExt, SinkExt, StreamExt}; + use hotshot_types::{signature_key::BLSPubKey, traits::signature_key::SignatureKey}; + use sequencer::{Leaf, NodeState, ValidatedState}; use std::{ - collections::{HashMap, HashSet}, - sync::{mpsc, Arc, RwLock}, - thread, + net::{IpAddr, Ipv4Addr}, + sync::Arc, + time::Duration, }; - #[test] - fn test_process_client_handling_stream() { - // Woo hoo - let (server_message_sender, server_message_receiver) = mpsc::channel(); - let (internal_client_message_sender, internal_client_message_receiver) = mpsc::channel(); + pub fn create_test_client_thread_state() -> ClientThreadState { + ClientThreadState { + clients: Default::default(), + subscribed_latest_block: Default::default(), + subscribed_node_identity: Default::default(), + subscribed_voters: Default::default(), + connection_id_counter: ClientId::from_count(1), + } + } - let mut clients = HashMap::with_capacity(1); + pub fn create_test_data_state() -> (NodeIdentity, NodeIdentity, NodeIdentity, DataState) { + let node_1 = { + let (pub_key, _) = BLSPubKey::generated_from_seed_indexed([0; 32], 0); + NodeIdentity::new( + pub_key, + "a".to_string(), + Default::default(), + vec![IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1))], + "company".to_string(), + Some(LocationDetails::new((0.0, 0.0), "US".to_string())), + "Windows 11".to_string(), + "espresso".to_string(), + "residential".to_string(), + ) + }; - let mut subscribed_node_identity = HashSet::with_capacity(1); - let mut subscribed_latest_block = HashSet::with_capacity(1); + let node_2 = { + let (pub_key, _) = BLSPubKey::generated_from_seed_indexed([0; 32], 1); + NodeIdentity::new( + pub_key, + "b".to_string(), + Default::default(), + vec![IpAddr::V4(Ipv4Addr::new(127, 0, 0, 2))], + "company".to_string(), + Some(LocationDetails::new((0.0, 0.0), "US".to_string())), + "Windows 11".to_string(), + "espresso".to_string(), + "residential".to_string(), + ) + }; - let client_thread_state = ClientThreadState { - clients, - subscribed_latest_block, - subscribed_node_identity, - connection_id_counter: 1, + let node_3 = { + let (pub_key, _) = BLSPubKey::generated_from_seed_indexed([0; 32], 2); + NodeIdentity::new( + pub_key, + "b".to_string(), + Default::default(), + vec![IpAddr::V4(Ipv4Addr::new(127, 0, 0, 3))], + "company".to_string(), + Some(LocationDetails::new((0.0, 0.0), "US".to_string())), + "Windows 11".to_string(), + "espresso".to_string(), + "residential".to_string(), + ) }; - let client_thread_state = Arc::new(RwLock::new(client_thread_state)); - let data_state = Arc::new(RwLock::new(DataState { - latest_voters: CircularBuffer::new(), - latest_blocks: CircularBuffer::new(), - stake_table: Default::default(), - node_identity: vec![], - })); + let mut data_state: DataState = Default::default(); + data_state.add_node_identity(node_1.clone()); + data_state.add_node_identity(node_2.clone()); + data_state.add_node_identity(node_3.clone()); + + (node_1, node_2, node_3, data_state) + } + + #[async_std::test] + async fn test_client_handling_stream_task_shutdown() { + let (_, _, _, data_state) = create_test_data_state(); + let client_thread_state = Arc::new(RwLock::new(create_test_client_thread_state())); + let data_state = Arc::new(RwLock::new(data_state)); - thread::spawn(move || { - process_client_handling_thread( + let (mut internal_client_message_sender, internal_client_message_receiver) = + mpsc::channel(1); + let process_client_handling_stream_handle: async_std::task::JoinHandle<()> = + async_std::task::spawn(process_internal_client_message_stream( + internal_client_message_receiver, + data_state, + client_thread_state, + )); + + // disconnect the last internal client message sender + internal_client_message_sender.disconnect(); + + // Join the async task. + if let Err(timeout_error) = async_std::io::timeout( + Duration::from_millis(200), + process_client_handling_stream_handle.map(Ok), + ) + .await + { + panic!( + "process_client_handling_stream_handle did not complete in time, error: {}", + timeout_error + ); + } + } + + #[async_std::test] + async fn test_process_client_handling_stream_request_latest_voters_snapshot() { + let (_, _, _, mut data_state) = create_test_data_state(); + let client_thread_state = Arc::new(RwLock::new(create_test_client_thread_state())); + let voters_1 = BitVec::from_vec(vec![0x55]); + let voters_2 = BitVec::from_vec(vec![0xAA]); + data_state.add_latest_voters(voters_1.clone()); + data_state.add_latest_voters(voters_2.clone()); + + let data_state = Arc::new(RwLock::new(data_state)); + + let (internal_client_message_sender, internal_client_message_receiver) = mpsc::channel(1); + let (server_message_sender_1, mut server_message_receiver_1) = mpsc::channel(1); + let (server_message_sender_2, mut server_message_receiver_2) = mpsc::channel(1); + let process_client_handling_stream_handle = + async_std::task::spawn(process_internal_client_message_stream( + internal_client_message_receiver, + data_state, + client_thread_state, + )); + + // Send a Connected Message to the server + let mut internal_client_message_sender_1 = internal_client_message_sender.clone(); + assert_eq!( + internal_client_message_sender_1 + .send(InternalClientMessage::Connected(server_message_sender_1)) + .await, + Ok(()) + ); + + assert_eq!( + server_message_receiver_1.next().await, + Some(ServerMessage::YouAre(ClientId::from_count(2))), + ); + + let client_1_id = ClientId::from_count(2); + + let mut internal_client_message_sender_2 = internal_client_message_sender; + assert_eq!( + internal_client_message_sender_2 + .send(InternalClientMessage::Connected(server_message_sender_2)) + .await, + Ok(()) + ); + + assert_eq!( + server_message_receiver_2.next().await, + Some(ServerMessage::YouAre(ClientId::from_count(3))), + ); + + assert_eq!( + internal_client_message_sender_1 + .send(InternalClientMessage::RequestVotersSnapshot(client_1_id)) + .await, + Ok(()), + ); + + assert_eq!( + server_message_receiver_1.next().await, + Some(ServerMessage::VotersSnapshot(Arc::new(vec![ + voters_1, voters_2 + ]))), + ); + + // disconnect the internal client message sender + internal_client_message_sender_1.disconnect(); + internal_client_message_sender_2.disconnect(); + + // The server message receiver should be shutdown, and should return + // nothing further + assert_eq!(server_message_receiver_1.next().await, None); + assert_eq!(server_message_receiver_2.next().await, None); + + if let Err(timeout_error) = async_std::io::timeout( + Duration::from_millis(200), + process_client_handling_stream_handle.map(Ok), + ) + .await + { + panic!( + "process_client_handling_stream_handle did not complete in time, error: {}", + timeout_error + ); + } + } + + #[async_std::test] + #[cfg(feature = "testing")] + async fn test_process_client_handling_stream_request_latest_blocks_snapshot() { + use super::clone_block_detail; + use crate::service::data_state::create_block_detail_from_leaf; + + let (_, _, _, mut data_state) = create_test_data_state(); + let client_thread_state = Arc::new(RwLock::new(create_test_client_thread_state())); + let leaf_1 = Leaf::genesis(&ValidatedState::default(), &NodeState::mock()).await; + let block_1 = create_block_detail_from_leaf(&leaf_1); + data_state.add_latest_block(clone_block_detail(&block_1)); + + let data_state = Arc::new(RwLock::new(data_state)); + + let (internal_client_message_sender, internal_client_message_receiver) = mpsc::channel(1); + let (server_message_sender_1, mut server_message_receiver_1) = mpsc::channel(1); + let (server_message_sender_2, mut server_message_receiver_2) = mpsc::channel(1); + let process_client_handling_stream_handle = + async_std::task::spawn(process_internal_client_message_stream( + internal_client_message_receiver, + data_state, + client_thread_state, + )); + + // Send a Connected Message to the server + let mut internal_client_message_sender_1 = internal_client_message_sender.clone(); + assert_eq!( + internal_client_message_sender_1 + .send(InternalClientMessage::Connected(server_message_sender_1)) + .await, + Ok(()) + ); + + assert_eq!( + server_message_receiver_1.next().await, + Some(ServerMessage::YouAre(ClientId::from_count(2))), + ); + + let client_1_id = ClientId::from_count(2); + + let mut internal_client_message_sender_2 = internal_client_message_sender; + assert_eq!( + internal_client_message_sender_2 + .send(InternalClientMessage::Connected(server_message_sender_2)) + .await, + Ok(()), + ); + + assert_eq!( + server_message_receiver_2.next().await, + Some(ServerMessage::YouAre(ClientId::from_count(3))), + ); + + assert_eq!( + internal_client_message_sender_1 + .send(InternalClientMessage::RequestBlocksSnapshot(client_1_id)) + .await, + Ok(()), + ); + + assert_eq!( + server_message_receiver_1.next().await, + Some(ServerMessage::BlocksSnapshot(Arc::new(vec![block_1]))), + ); + + // disconnect the internal client message sender + internal_client_message_sender_1.disconnect(); + internal_client_message_sender_2.disconnect(); + + // The server message receiver should be shutdown, and should return + // nothing further + assert_eq!(server_message_receiver_1.next().await, None); + assert_eq!(server_message_receiver_2.next().await, None); + + // Join the async task. + if let Err(timeout_error) = async_std::io::timeout( + Duration::from_millis(200), + process_client_handling_stream_handle.map(Ok), + ) + .await + { + panic!( + "process_client_handling_stream_handle did not complete in time, error: {}", + timeout_error + ); + } + } + + #[async_std::test] + async fn test_process_client_handling_stream_request_node_identity_snapshot() { + let (node_1, node_2, node_3, data_state) = create_test_data_state(); + let client_thread_state = Arc::new(RwLock::new(create_test_client_thread_state())); + let data_state = Arc::new(RwLock::new(data_state)); + + let (internal_client_message_sender, internal_client_message_receiver) = mpsc::channel(1); + let (server_message_sender_1, mut server_message_receiver_1) = mpsc::channel(1); + let (server_message_sender_2, mut server_message_receiver_2) = mpsc::channel(1); + let process_client_handling_stream_handle: async_std::task::JoinHandle<()> = + async_std::task::spawn(process_internal_client_message_stream( + internal_client_message_receiver, + data_state, + client_thread_state, + )); + + // Send a Connected Message to the server + let mut internal_client_message_sender_1 = internal_client_message_sender.clone(); + assert_eq!( + internal_client_message_sender_1 + .send(InternalClientMessage::Connected(server_message_sender_1)) + .await, + Ok(()) + ); + + assert_eq!( + server_message_receiver_1.next().await, + Some(ServerMessage::YouAre(ClientId::from_count(2))), + ); + + let client_1_id = ClientId::from_count(2); + + // Send another Connected Message to the server + let mut internal_client_message_sender_2 = internal_client_message_sender; + assert_eq!( + internal_client_message_sender_2 + .send(InternalClientMessage::Connected(server_message_sender_2)) + .await, + Ok(()) + ); + + assert_eq!( + server_message_receiver_2.next().await, + Some(ServerMessage::YouAre(ClientId::from_count(3))), + ); + + assert_eq!( + internal_client_message_sender_1 + .send(InternalClientMessage::RequestNodeIdentitySnapshot( + client_1_id + )) + .await, + Ok(()), + ); + + assert_eq!( + server_message_receiver_1.next().await, + Some(ServerMessage::NodeIdentitySnapshot(Arc::new(vec![ + node_1.clone(), + node_2.clone(), + node_3.clone() + ]))), + ); + + // disconnect the last internal client message sender + internal_client_message_sender_1.disconnect(); + internal_client_message_sender_2.disconnect(); + + // The server message receiver should be shutdown, and should return + // nothing further + assert_eq!(server_message_receiver_1.next().await, None); + assert_eq!(server_message_receiver_2.next().await, None); + + // Join the async task. + if let Err(timeout_error) = async_std::io::timeout( + Duration::from_millis(200), + process_client_handling_stream_handle.map(Ok), + ) + .await + { + panic!( + "process_client_handling_stream_handle did not complete in time, error: {}", + timeout_error + ); + } + } + + #[async_std::test] + async fn test_process_client_handling_stream_subscribe_latest_block() { + let (_, _, _, data_state) = create_test_data_state(); + let client_thread_state = Arc::new(RwLock::new(create_test_client_thread_state())); + let data_state = Arc::new(RwLock::new(data_state)); + + let (mut leaf_sender, leaf_receiver) = mpsc::channel(1); + let (block_detail_sender, block_detail_receiver) = mpsc::channel(1); + let (internal_client_message_sender, internal_client_message_receiver) = mpsc::channel(1); + let (server_message_sender_1, mut server_message_receiver_1) = mpsc::channel(1); + let (server_message_sender_2, mut server_message_receiver_2) = mpsc::channel(1); + let (server_message_sender_3, mut server_message_receiver_3) = mpsc::channel(1); + let process_client_handling_stream_handle = + async_std::task::spawn(process_internal_client_message_stream( internal_client_message_receiver, data_state.clone(), client_thread_state.clone(), - ) - }); + )); + + let process_distribute_client_handling_handle = + async_std::task::spawn(process_distribute_block_detail_handling_stream( + client_thread_state, + block_detail_receiver, + )); + + let process_leaf_stream_handle = async_std::task::spawn(process_leaf_stream( + leaf_receiver, + data_state, + block_detail_sender, + )); + + // Send a Connected Message to the server + let mut internal_client_message_sender_1 = internal_client_message_sender.clone(); + assert_eq!( + internal_client_message_sender_1 + .send(InternalClientMessage::Connected(server_message_sender_1)) + .await, + Ok(()) + ); + + assert_eq!( + server_message_receiver_1.next().await, + Some(ServerMessage::YouAre(ClientId::from_count(2))), + ); + + let client_1_id = ClientId::from_count(2); + let client_2_id = ClientId::from_count(3); + + // Send another Connected Message to the server + let mut internal_client_message_sender_2 = internal_client_message_sender.clone(); + assert_eq!( + internal_client_message_sender_2 + .send(InternalClientMessage::Connected(server_message_sender_2)) + .await, + Ok(()) + ); + + assert_eq!( + server_message_receiver_2.next().await, + Some(ServerMessage::YouAre(ClientId::from_count(3))), + ); + + // Send another Connected Message to the server + let mut internal_client_message_sender_3 = internal_client_message_sender; + assert_eq!( + internal_client_message_sender_3 + .send(InternalClientMessage::Connected(server_message_sender_3)) + .await, + Ok(()) + ); + + assert_eq!( + server_message_receiver_3.next().await, + Some(ServerMessage::YouAre(ClientId::from_count(4))), + ); + + assert_eq!( + internal_client_message_sender_1 + .send(InternalClientMessage::SubscribeLatestBlock(client_1_id)) + .await, + Ok(()), + ); + + assert_eq!( + internal_client_message_sender_1 + .send(InternalClientMessage::SubscribeLatestBlock(client_2_id)) + .await, + Ok(()), + ); + + // No response expected from the client messages at the moment. + + // send a new leaf + let leaf = Leaf::genesis(&ValidatedState::default(), &NodeState::mock()).await; + let expected_block = create_block_detail_from_leaf(&leaf); + let arc_expected_block = Arc::new(expected_block); + + assert_eq!(leaf_sender.send(leaf).await, Ok(())); + + // We should receive the Block Detail on each subscribed client + assert_eq!( + server_message_receiver_1.next().await, + Some(ServerMessage::LatestBlock(arc_expected_block.clone())) + ); + assert_eq!( + server_message_receiver_2.next().await, + Some(ServerMessage::LatestBlock(arc_expected_block.clone())) + ); + + // disconnect the leaf sender + leaf_sender.disconnect(); + + // Join the async task. + if let Err(timeout_error) = async_std::io::timeout( + Duration::from_millis(200), + process_leaf_stream_handle.map(Ok), + ) + .await + { + panic!( + "process_leaf_stream_handle did not complete in time, error: {}", + timeout_error + ); + } + + // Join the async task. + if let Err(timeout_error) = async_std::io::timeout( + Duration::from_millis(200), + process_distribute_client_handling_handle.map(Ok), + ) + .await + { + panic!( + "process_distribute_client_handling_handle did not complete in time, error: {}", + timeout_error + ); + } + + // disconnect the last internal client message sender + internal_client_message_sender_1.disconnect(); + internal_client_message_sender_2.disconnect(); + internal_client_message_sender_3.disconnect(); + + // The server message receiver should be shutdown, and should return + // nothing further + assert_eq!(server_message_receiver_1.next().await, None); + assert_eq!(server_message_receiver_2.next().await, None); + assert_eq!(server_message_receiver_3.next().await, None); + + // Join the async task. + if let Err(timeout_error) = async_std::io::timeout( + Duration::from_millis(200), + process_client_handling_stream_handle.map(Ok), + ) + .await + { + panic!( + "process_client_handling_stream_handle did not complete in time, error: {}", + timeout_error + ); + } + } + + #[async_std::test] + async fn test_process_client_handling_stream_subscribe_node_identity() { + let (node_1, _, _, data_state) = create_test_data_state(); + let client_thread_state = Arc::new(RwLock::new(create_test_client_thread_state())); + let data_state = Arc::new(RwLock::new(data_state)); + + let (mut node_identity_sender, node_identity_receiver) = mpsc::channel(1); + let (internal_client_message_sender, internal_client_message_receiver) = mpsc::channel(1); + let (server_message_sender_1, mut server_message_receiver_1) = mpsc::channel(1); + let (server_message_sender_2, mut server_message_receiver_2) = mpsc::channel(1); + let (server_message_sender_3, mut server_message_receiver_3) = mpsc::channel(1); + let process_client_handling_stream_handle = + async_std::task::spawn(process_internal_client_message_stream( + internal_client_message_receiver, + data_state.clone(), + client_thread_state.clone(), + )); + + let process_distribute_client_handling_handle = + async_std::task::spawn(process_distribute_node_identity_handling_stream( + client_thread_state, + node_identity_receiver, + )); + + // Send a Connected Message to the server + let mut internal_client_message_sender_1 = internal_client_message_sender.clone(); + assert_eq!( + internal_client_message_sender_1 + .send(InternalClientMessage::Connected(server_message_sender_1)) + .await, + Ok(()) + ); + + assert_eq!( + server_message_receiver_1.next().await, + Some(ServerMessage::YouAre(ClientId::from_count(2))), + ); + + let client_1_id = ClientId::from_count(2); + let client_2_id = ClientId::from_count(3); + + // Send another Connected Message to the server + let mut internal_client_message_sender_2 = internal_client_message_sender.clone(); + assert_eq!( + internal_client_message_sender_2 + .send(InternalClientMessage::Connected(server_message_sender_2)) + .await, + Ok(()) + ); + + assert_eq!( + server_message_receiver_2.next().await, + Some(ServerMessage::YouAre(ClientId::from_count(3))), + ); + + // Send another Connected Message to the server + let mut internal_client_message_sender_3 = internal_client_message_sender; + assert_eq!( + internal_client_message_sender_3 + .send(InternalClientMessage::Connected(server_message_sender_3)) + .await, + Ok(()) + ); + + assert_eq!( + server_message_receiver_3.next().await, + Some(ServerMessage::YouAre(ClientId::from_count(4))), + ); + + assert_eq!( + internal_client_message_sender_1 + .send(InternalClientMessage::SubscribeNodeIdentity(client_1_id)) + .await, + Ok(()), + ); + + assert_eq!( + internal_client_message_sender_1 + .send(InternalClientMessage::SubscribeNodeIdentity(client_2_id)) + .await, + Ok(()), + ); + + // No response expected from the client messages at the moment. + + // send a new Node Identity + let node_identity = node_1; + assert_eq!( + node_identity_sender.send(node_identity.clone()).await, + Ok(()) + ); + + let arc_node_identity = Arc::new(node_identity.clone()); + + // We should receive the Block Detail on each subscribed client + assert_eq!( + server_message_receiver_1.next().await, + Some(ServerMessage::LatestNodeIdentity(arc_node_identity.clone())) + ); + assert_eq!( + server_message_receiver_2.next().await, + Some(ServerMessage::LatestNodeIdentity(arc_node_identity.clone())) + ); + + // disconnect the leaf sender + node_identity_sender.disconnect(); + + // Join the async task. + if let Err(timeout_error) = async_std::io::timeout( + Duration::from_millis(200), + process_distribute_client_handling_handle.map(Ok), + ) + .await + { + panic!( + "process_distribute_client_handling_handle did not complete in time, error: {}", + timeout_error + ); + } + + // disconnect the last internal client message sender + internal_client_message_sender_1.disconnect(); + internal_client_message_sender_2.disconnect(); + internal_client_message_sender_3.disconnect(); + + // The server message receiver should be shutdown, and should return + // nothing further + assert_eq!(server_message_receiver_1.next().await, None); + assert_eq!(server_message_receiver_2.next().await, None); + assert_eq!(server_message_receiver_3.next().await, None); + + // Join the async task. + if let Err(timeout_error) = async_std::io::timeout( + Duration::from_millis(200), + process_client_handling_stream_handle.map(Ok), + ) + .await + { + panic!( + "process_client_handling_stream_handle did not complete in time, error: {}", + timeout_error + ); + } + } + + #[async_std::test] + async fn test_process_client_handling_stream_subscribe_voters() { + let (_, _, _, data_state) = create_test_data_state(); + let client_thread_state = Arc::new(RwLock::new(create_test_client_thread_state())); + let data_state = Arc::new(RwLock::new(data_state)); + + let (mut voters_sender, voters_receiver) = mpsc::channel(1); + let (internal_client_message_sender, internal_client_message_receiver) = mpsc::channel(1); + let (server_message_sender_1, mut server_message_receiver_1) = mpsc::channel(1); + let (server_message_sender_2, mut server_message_receiver_2) = mpsc::channel(1); + let (server_message_sender_3, mut server_message_receiver_3) = mpsc::channel(1); + let process_client_handling_stream_handle = + async_std::task::spawn(process_internal_client_message_stream( + internal_client_message_receiver, + data_state.clone(), + client_thread_state.clone(), + )); + + let process_distribute_voters_handle = async_std::task::spawn( + process_distribute_voters_handling_stream(client_thread_state, voters_receiver), + ); // Send a Connected Message to the server + let mut internal_client_message_sender_1 = internal_client_message_sender.clone(); + assert_eq!( + internal_client_message_sender_1 + .send(InternalClientMessage::Connected(server_message_sender_1)) + .await, + Ok(()) + ); + + assert_eq!( + server_message_receiver_1.next().await, + Some(ServerMessage::YouAre(ClientId::from_count(2))), + ); + + let client_1_id = ClientId::from_count(2); + let client_2_id = ClientId::from_count(3); + + // Send another Connected Message to the server + let mut internal_client_message_sender_2 = internal_client_message_sender.clone(); assert_eq!( - internal_client_message_sender - .send(InternalClientMessage::Connected(server_message_sender)), + internal_client_message_sender_2 + .send(InternalClientMessage::Connected(server_message_sender_2)) + .await, Ok(()) ); assert_eq!( - server_message_receiver.recv(), - Ok(ServerMessage::YouAre(ClientId::from_count(2))), + server_message_receiver_2.next().await, + Some(ServerMessage::YouAre(ClientId::from_count(3))), + ); + + // Send another Connected Message to the server + let mut internal_client_message_sender_3 = internal_client_message_sender; + assert_eq!( + internal_client_message_sender_3 + .send(InternalClientMessage::Connected(server_message_sender_3)) + .await, + Ok(()) + ); + + assert_eq!( + server_message_receiver_3.next().await, + Some(ServerMessage::YouAre(ClientId::from_count(4))), + ); + + assert_eq!( + internal_client_message_sender_1 + .send(InternalClientMessage::SubscribeNodeIdentity(client_1_id)) + .await, + Ok(()), + ); + + assert_eq!( + internal_client_message_sender_1 + .send(InternalClientMessage::SubscribeNodeIdentity(client_2_id)) + .await, + Ok(()), + ); + + // No response expected from the client messages at the moment. + + // send a new Node Identity + let voters = BitVec::from_vec(vec![0x55]); + assert_eq!(voters_sender.send(voters.clone()).await, Ok(())); + + // We should receive the Block Detail on each subscribed client + assert_eq!( + server_message_receiver_1.next().await, + Some(ServerMessage::LatestVoters(voters.clone())) + ); + assert_eq!( + server_message_receiver_2.next().await, + Some(ServerMessage::LatestVoters(voters.clone())) + ); + + // disconnect the leaf sender + voters_sender.disconnect(); + + // Join the async task. + if let Err(timeout_error) = async_std::io::timeout( + Duration::from_millis(200), + process_distribute_voters_handle.map(Ok), + ) + .await + { + panic!( + "process_distribute_voters_handle did not complete in time, error: {}", + timeout_error + ); + } + + // disconnect the last internal client message sender + internal_client_message_sender_1.disconnect(); + internal_client_message_sender_2.disconnect(); + internal_client_message_sender_3.disconnect(); + + // The server message receiver should be shutdown, and should return + // nothing further + assert_eq!(server_message_receiver_1.next().await, None); + assert_eq!(server_message_receiver_2.next().await, None); + assert_eq!(server_message_receiver_3.next().await, None); + + // Join the async task. + if let Err(timeout_error) = async_std::io::timeout( + Duration::from_millis(200), + process_client_handling_stream_handle.map(Ok), + ) + .await + { + panic!( + "process_client_handling_stream_handle did not complete in time, error: {}", + timeout_error + ); + } + } + + #[async_std::test] + async fn test_channel_assumption_1() { + let (sender, mut receiver) = mpsc::channel::(1); + + drop(sender); + + assert_eq!(receiver.next().await, None); + } + + #[async_std::test] + async fn test_channel_assumption_2() { + let (sender, mut receiver) = mpsc::channel::(1); + + let join_handle = async_std::task::spawn(async move { receiver.next().await }); + drop(sender); + + assert_eq!(join_handle.await, None); + } + + #[async_std::test] + async fn test_channel_assumption_3() { + let (mut sender, receiver) = mpsc::channel(1); + + drop(receiver); + + assert_ne!(sender.send(1).await, Ok(())); + } + + #[async_std::test] + async fn test_channel_assumption_4() { + let (mut sender, mut receiver) = mpsc::channel(1); + + let join_handle = async_std::task::spawn(async move { + _ = sender.send(1).await; + async_std::task::sleep(Duration::from_millis(100)).await; + sender.send(2).await + }); + async_std::task::sleep(Duration::from_millis(50)).await; + receiver.close(); + + assert_eq!(receiver.next().await, Some(1)); + assert_eq!(receiver.next().await, None); + assert_ne!(join_handle.await, Ok(())); + } + + #[async_std::test] + async fn test_timeout_assumption_1() { + assert_eq!( + async_std::future::timeout(std::time::Duration::from_millis(100), async move { 1u64 }) + .await, + Ok(1u64) + ); + } + + #[async_std::test] + async fn test_timeout_assumption_2() { + assert_ne!( + async_std::future::timeout( + std::time::Duration::from_millis(100), + futures::future::pending::() + ) + .await, + Ok(1u64) ); } } diff --git a/node-metrics/src/service/data_state/location_details.rs b/node-metrics/src/service/data_state/location_details.rs new file mode 100644 index 000000000..dc01b6bd9 --- /dev/null +++ b/node-metrics/src/service/data_state/location_details.rs @@ -0,0 +1,95 @@ +use serde::{Deserialize, Serialize}; + +/// [LocationDetails] represents the details of the location of the node. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct LocationDetails { + coords: (f64, f64), + country: String, +} + +impl LocationDetails { + pub fn new(coords: (f64, f64), country: String) -> Self { + Self { coords, country } + } + + pub fn coords(&self) -> (f64, f64) { + self.coords + } + + pub fn country(&self) -> &str { + &self.country + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_location_details_coords() { + let coords = (0.0, 0.0); + let country = "US".to_string(); + let location_details = LocationDetails::new(coords, country.clone()); + + assert_eq!(location_details.coords(), coords); + } + + #[test] + fn test_location_details_country() { + let coords = (0.0, 0.0); + let country = "US".to_string(); + let location_details = LocationDetails::new(coords, country.clone()); + + assert_eq!(location_details.country(), country); + } + + #[test] + fn test_location_details_eq() { + let coords = (0.0, 0.0); + let country = "US".to_string(); + let location_details = LocationDetails::new(coords, country.clone()); + let location_details_2 = LocationDetails::new(coords, country.clone()); + + assert_eq!(location_details, location_details_2); + } + + #[test] + fn test_location_details_debug() { + let coords = (0.0, 0.0); + let country = "US".to_string(); + let location_details = LocationDetails::new(coords, country.clone()); + + assert_eq!( + format!("{:?}", location_details), + format!( + "LocationDetails {{ coords: {:?}, country: {:?} }}", + coords, country + ) + ); + } + + #[test] + fn test_location_details_clone() { + let coords = (0.0, 0.0); + let country = "US".to_string(); + let location_details = LocationDetails::new(coords, country.clone()); + let cloned_location_details = location_details.clone(); + + assert_eq!(location_details, cloned_location_details); + } + + #[test] + #[cfg(feature = "testing")] + fn test_location_serialization() { + use serde_json; + + let coords = (1.2, 3.4); + let country = "US".to_string(); + let location_details = LocationDetails::new(coords, country.clone()); + + let serialized = serde_json::to_string(&location_details).unwrap(); + let deserialized: LocationDetails = serde_json::from_str(&serialized).unwrap(); + + assert_eq!(location_details, deserialized); + } +} diff --git a/node-metrics/src/service/data_state/mod.rs b/node-metrics/src/service/data_state/mod.rs index 8b7da10a0..5651ae836 100644 --- a/node-metrics/src/service/data_state/mod.rs +++ b/node-metrics/src/service/data_state/mod.rs @@ -1,6 +1,13 @@ +pub mod location_details; +pub mod node_identity; + +use async_std::sync::RwLock; use bitvec::vec::BitVec; use circular_buffer::CircularBuffer; -use futures::{Stream, StreamExt}; +use futures::{ + channel::mpsc::{SendError, Sender}, + SinkExt, Stream, StreamExt, +}; use hotshot_query_service::{ availability::QueryableHeader, explorer::{BlockDetail, ExplorerHeader, Timestamp}, @@ -16,102 +23,81 @@ use hotshot_types::{ BlockPayload, }, }; +pub use location_details::LocationDetails; +pub use node_identity::NodeIdentity; use sequencer::{Header, Payload, SeqTypes}; -use serde::{Deserialize, Serialize}; -use std::sync::mpsc::{Receiver, Sender}; -use std::{ - collections::HashSet, - iter::zip, - net::IpAddr, - sync::{Arc, RwLock}, -}; +use std::{collections::HashSet, iter::zip, sync::Arc}; use time::OffsetDateTime; /// MAX_HISTORY represents the last N records that are stored within the /// DataState structure for the various different sample types. const MAX_HISTORY: usize = 50; -/// [LocationDetails] represents the details of the location of the node. -#[derive(Clone, PartialEq, Debug, Serialize, Deserialize)] -pub struct LocationDetails { - coords: (f64, f64), - country: String, -} - -impl LocationDetails { - pub fn new(coords: (f64, f64), country: String) -> Self { - Self { coords, country } - } -} - -/// [NodeIdentity] represents the identity of the node that is participating -/// in the network. -#[derive(Clone, PartialEq, Debug, Serialize, Deserialize)] -pub struct NodeIdentity { - public_key: BLSPubKey, - name: String, - wallet_address: String, - ip_addresses: Vec, - company: String, - location: Option, - operating_system: String, - node_type: String, - network_type: String, +/// [DataState] represents the state of the data that is being stored within +/// the service. +#[cfg_attr(test, derive(Default))] +pub struct DataState { + latest_blocks: CircularBuffer>, + latest_voters: CircularBuffer, + stake_table: StakeTable, + // Do we need any other data at the moment? + node_identity: Vec<(BLSPubKey, NodeIdentity)>, } -impl NodeIdentity { +impl DataState { pub fn new( - public_key: BLSPubKey, - name: String, - wallet_address: String, - ip_addresses: Vec, - company: String, - location: Option, - operating_system: String, - node_type: String, - network_type: String, + latest_blocks: CircularBuffer>, + latest_voters: CircularBuffer, + stake_table: StakeTable, + node_identity: Vec<(BLSPubKey, NodeIdentity)>, ) -> Self { Self { - public_key, - name, - wallet_address, - ip_addresses, - company, - location, - operating_system, - node_type, - network_type, + latest_blocks, + latest_voters, + stake_table, + node_identity, } } - pub fn from_public_key(public_key: BLSPubKey) -> Self { - Self { - public_key, - name: String::new(), - wallet_address: String::new(), - ip_addresses: vec![], - company: String::new(), - location: None, - operating_system: String::new(), - node_type: String::new(), - network_type: String::new(), - } + pub fn latest_blocks(&self) -> impl Iterator> { + self.latest_blocks.iter() } -} -/// [DataState] represents the state of the data that is being stored within -/// the service. -pub struct DataState { - pub latest_blocks: CircularBuffer>, - pub latest_voters: CircularBuffer, - pub stake_table: StakeTable, - // Do we need any other data at the moment? - pub node_identity: Vec<(BLSPubKey, NodeIdentity)>, + pub fn latest_voters(&self) -> impl Iterator { + self.latest_voters.iter() + } + + pub fn stake_table(&self) -> &StakeTable { + &self.stake_table + } + + pub fn node_identity(&self) -> impl Iterator { + self.node_identity.iter() + } + + pub fn replace_stake_table( + &mut self, + stake_table: StakeTable, + ) { + self.stake_table = stake_table; + } + + pub fn add_latest_block(&mut self, block: BlockDetail) { + self.latest_blocks.push_back(block); + } + + pub fn add_latest_voters(&mut self, voters: BitVec) { + self.latest_voters.push_back(voters); + } + + pub fn add_node_identity(&mut self, identity: NodeIdentity) { + self.node_identity.push((*identity.public_key(), identity)); + } } /// [create_block_detail_from_leaf] is a helper function that will build a /// [BlockDetail] from the reference to [Leaf]. -fn create_block_detail_from_leaf(leaf: &Leaf) -> BlockDetail { +pub fn create_block_detail_from_leaf(leaf: &Leaf) -> BlockDetail { let block_header = leaf.block_header(); let block_payload = leaf.block_payload().unwrap_or(Payload::empty().0); @@ -132,11 +118,41 @@ fn create_block_detail_from_leaf(leaf: &Leaf) -> BlockDetail } } -fn process_incoming_leaf( +/// [ProcessLeafError] represents the error that can occur when processing +/// a [Leaf]. +#[derive(Debug)] +pub enum ProcessLeafError { + SendError(SendError), +} + +impl std::fmt::Display for ProcessLeafError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ProcessLeafError::SendError(err) => { + write!(f, "error sending block detail to sender: {}", err) + } + } + } +} + +impl std::error::Error for ProcessLeafError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + ProcessLeafError::SendError(err) => Some(err), + } + } +} + +/// [process_incoming_leaf] is a helper function that will process an incoming +/// [Leaf] and update the [DataState] with the new information. +/// Additionally, the block that is contained within the [Leaf] will be +/// computed into a [BlockDetail] and sent to the [Sender] so that it can be +/// processed for real-time considerations. +async fn process_incoming_leaf( leaf: Leaf, data_state: Arc>, - block_sender: Sender>, -) -> Result<(), ()> + mut block_sender: Sender>, +) -> Result<(), ProcessLeafError> where Header: BlockHeader + QueryableHeader + ExplorerHeader, Payload: BlockPayload, @@ -172,14 +188,7 @@ where // We will need to recompute these BitVecs if the node information that // is stored shrinks instead of growing. - let mut data_state_write_lock_guard = match data_state.write() { - Ok(guard) => guard, - Err(_) => { - // This lock is poisoned, and we won't ever be able to - // acquire it. So we should just exit here. - return Err(()); - } - }; + let mut data_state_write_lock_guard = data_state.write().await; let stake_table = &data_state_write_lock_guard.stake_table; let stable_table_entries_vec = stake_table @@ -206,11 +215,7 @@ where let voters_bitvec = data_state_write_lock_guard.node_identity.iter().fold( BitVec::with_capacity(data_state_write_lock_guard.node_identity.len()), |mut acc, key| { - if voters_set.contains(&key.0) { - acc.push(true); - } else { - acc.push(false); - } + acc.push(voters_set.contains(&key.0)); acc }, ); @@ -224,9 +229,9 @@ where drop(data_state_write_lock_guard); - if let Err(_) = block_sender.send(block_detail_copy) { + if let Err(err) = block_sender.send(block_detail_copy).await { // We have an error that prevents us from continuing - return Err(()); + return Err(ProcessLeafError::SendError(err)); } Ok(()) @@ -243,28 +248,114 @@ pub async fn process_leaf_stream( Header: BlockHeader + QueryableHeader + ExplorerHeader, Payload: BlockPayload, { - while let Some(leaf) = stream.next().await { - if let Err(_) = process_incoming_leaf(leaf, data_state.clone(), block_sender.clone()) { + loop { + let leaf_result = stream.next().await; + let leaf = if let Some(leaf) = leaf_result { + leaf + } else { + // We have reached the end of the stream + tracing::info!("process leaf stream: end of stream reached for leaf stream."); + return; + }; + + if let Err(err) = + process_incoming_leaf(leaf, data_state.clone(), block_sender.clone()).await + { // We have an error that prevents us from continuing + tracing::info!("process leaf stream: error processing leaf: {}", err); break; } } } -/// [process_leaf_thread] allows for the consumption of a [Receiver] when -/// attempting to process new incoming [Leaf]s. -pub fn process_leaf_thread( - receiver: Receiver>, - data_state: Arc>, - block_sender: Sender>, -) where - Header: BlockHeader + QueryableHeader + ExplorerHeader, - Payload: BlockPayload, -{ - while let Ok(leaf) = receiver.recv() { - if let Err(_) = process_incoming_leaf(leaf, data_state.clone(), block_sender.clone()) { - // We have an error that prevents us from continuing - break; +#[cfg(test)] +mod tests { + use super::{process_leaf_stream, DataState}; + use async_std::{prelude::FutureExt, sync::RwLock}; + use futures::{channel::mpsc, SinkExt, StreamExt}; + use sequencer::{ + state::{BlockMerkleTree, FeeMerkleTree}, + ChainConfig, Leaf, NodeState, ValidatedState, + }; + use std::{sync::Arc, time::Duration}; + + #[async_std::test] + async fn test_process_leaf_error_debug() { + let (mut sender, receiver) = mpsc::channel(1); + // deliberately close the receiver. + drop(receiver); + + // Attempt to receive, and we should get an error. + let receive_result = sender.send(1).await; + + assert!(receive_result.is_err()); + let err = receive_result.unwrap_err(); + + let process_leaf_err = super::ProcessLeafError::SendError(err); + + assert_eq!( + format!("{:?}", process_leaf_err), + "SendError(SendError { kind: Disconnected })" + ); + } + + #[async_std::test] + async fn test_process_leaf_stream() { + let data_state: DataState = Default::default(); + let data_state = Arc::new(RwLock::new(data_state)); + let (block_sender, block_receiver) = futures::channel::mpsc::channel(1); + let (leaf_sender, leaf_receiver) = futures::channel::mpsc::channel(1); + + let process_leaf_stream_task_handle = async_std::task::spawn(process_leaf_stream( + leaf_receiver, + data_state.clone(), + block_sender, + )); + + { + let data_state = data_state.read().await; + // Latest blocks should be empty + assert_eq!(data_state.latest_blocks().count(), 0); + // Latest voters should be empty + assert_eq!(data_state.latest_voters().count(), 0); } + + let validated_state = ValidatedState { + block_merkle_tree: BlockMerkleTree::new(32), + fee_merkle_tree: FeeMerkleTree::new(32), + chain_config: ChainConfig::default().into(), + }; + let instance_state = NodeState::mock(); + + let sample_leaf = Leaf::genesis(&validated_state, &instance_state).await; + + let mut leaf_sender = leaf_sender; + // We should be able to send a leaf without issue + assert_eq!(leaf_sender.send(sample_leaf).await, Ok(()),); + + let mut block_receiver = block_receiver; + // We should receive a Block Detail. + + let next_block = block_receiver.next().await; + assert!(next_block.is_some()); + + { + let data_state = data_state.read().await; + // Latest blocks should now have a single entry + assert_eq!(data_state.latest_blocks().count(), 1); + // Latest voters should now have a single entry + assert_eq!(data_state.latest_voters().count(), 1); + } + + // We explicitly drop these, as it should make the task clean up. + drop(block_receiver); + drop(leaf_sender); + + assert_eq!( + process_leaf_stream_task_handle + .timeout(Duration::from_millis(200)) + .await, + Ok(()) + ); } } diff --git a/node-metrics/src/service/data_state/node_identity.rs b/node-metrics/src/service/data_state/node_identity.rs new file mode 100644 index 000000000..823ec25fe --- /dev/null +++ b/node-metrics/src/service/data_state/node_identity.rs @@ -0,0 +1,232 @@ +use super::LocationDetails; +use hotshot_types::signature_key::BLSPubKey; +use sequencer::state::FeeAccount; +use serde::{Deserialize, Serialize}; +use std::net::IpAddr; + +/// [NodeIdentity] represents the identity of the node that is participating +/// in the network. +#[derive(Clone, PartialEq, Debug, Serialize, Deserialize)] +pub struct NodeIdentity { + public_key: BLSPubKey, + name: String, + wallet_address: FeeAccount, + ip_addresses: Vec, + company: String, + location: Option, + operating_system: String, + node_type: String, + network_type: String, +} + +impl NodeIdentity { + #[allow(clippy::too_many_arguments)] + pub fn new( + public_key: BLSPubKey, + name: String, + wallet_address: FeeAccount, + ip_addresses: Vec, + company: String, + location: Option, + operating_system: String, + node_type: String, + network_type: String, + ) -> Self { + Self { + public_key, + name, + wallet_address, + ip_addresses, + company, + location, + operating_system, + node_type, + network_type, + } + } + + pub fn public_key(&self) -> &BLSPubKey { + &self.public_key + } + + pub fn name(&self) -> &str { + &self.name + } + + pub fn wallet_address(&self) -> &FeeAccount { + &self.wallet_address + } + + pub fn ip_addresses(&self) -> &[IpAddr] { + &self.ip_addresses + } + + pub fn company(&self) -> &str { + &self.company + } + + pub fn location(&self) -> Option<&LocationDetails> { + self.location.as_ref() + } + + pub fn operating_system(&self) -> &str { + &self.operating_system + } + + pub fn node_type(&self) -> &str { + &self.node_type + } + + pub fn network_type(&self) -> &str { + &self.network_type + } + + pub fn from_public_key(public_key: BLSPubKey) -> Self { + Self { + public_key, + name: String::new(), + wallet_address: Default::default(), + ip_addresses: vec![], + company: String::new(), + location: None, + operating_system: String::new(), + node_type: String::new(), + network_type: String::new(), + } + } +} + +#[cfg(test)] +pub mod tests { + use super::LocationDetails; + use super::NodeIdentity; + use hotshot_types::signature_key::BLSPubKey; + use hotshot_types::traits::signature_key::SignatureKey; + use std::net::IpAddr; + use std::net::Ipv4Addr; + + pub fn create_test_node(index: u64) -> NodeIdentity { + let (pub_key, _) = BLSPubKey::generated_from_seed_indexed([0; 32], index); + + NodeIdentity::new( + pub_key, + "a".to_string(), + Default::default(), + vec![IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1))], + "company".to_string(), + Some(LocationDetails::new((0.0, 0.0), "US".to_string())), + "Windows 11".to_string(), + "espresso".to_string(), + "residential".to_string(), + ) + } + + #[test] + fn test_node_identity_eq() { + let node_identity_1 = create_test_node(1); + let node_identity_2 = create_test_node(1); + let node_identity_3 = create_test_node(2); + + assert_eq!(node_identity_1, node_identity_2); + assert_ne!(node_identity_1, node_identity_3); + assert_ne!(node_identity_2, node_identity_3); + } + + #[test] + fn test_node_identity_eq_clone() { + let node_identity_1 = create_test_node(1); + let node_identity_2 = node_identity_1.clone(); + + assert_eq!(node_identity_1, node_identity_2); + } + + #[test] + #[cfg(feature = "testing")] + fn test_node_identity_serialize() { + use serde_json; + + let node_identity = create_test_node(1); + let serialized = serde_json::to_string(&node_identity).unwrap(); + let deserialized: NodeIdentity = serde_json::from_str(&serialized).unwrap(); + + assert_eq!(node_identity, deserialized); + } + + #[test] + fn test_node_identity_public_key() { + let node_identity = create_test_node(1); + let public_key = node_identity.public_key(); + + assert_eq!( + public_key, + &BLSPubKey::generated_from_seed_indexed([0; 32], 1).0 + ); + } + + #[test] + fn test_node_identity_name() { + let node_identity = create_test_node(1); + let name = node_identity.name(); + + assert_eq!(name, "a"); + } + + #[test] + fn test_node_identity_wallet_address() { + let node_identity = create_test_node(1); + let wallet_address = node_identity.wallet_address(); + + assert_eq!(wallet_address, &Default::default()); + } + + #[test] + fn test_node_identity_ip_addresses() { + let node_identity = create_test_node(1); + let ip_addresses = node_identity.ip_addresses(); + + assert_eq!(ip_addresses, &[IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1))]); + } + + #[test] + fn test_node_identity_company() { + let node_identity = create_test_node(1); + let company = node_identity.company(); + + assert_eq!(company, "company"); + } + + #[test] + fn test_node_identity_location() { + let node_identity = create_test_node(1); + let location = node_identity.location(); + + assert_eq!( + location, + Some(&LocationDetails::new((0.0, 0.0), "US".to_string())) + ); + } + + #[test] + fn test_node_identity_operating_system() { + let node_identity = create_test_node(1); + let operating_system = node_identity.operating_system(); + + assert_eq!(operating_system, "Windows 11"); + } + + #[test] + fn test_node_identity_node_type() { + let node_identity = create_test_node(1); + let node_type = node_identity.node_type(); + + assert_eq!(node_type, "espresso"); + } + + #[test] + fn test_node_identity_network_type() { + let node_identity = create_test_node(1); + let network_type = node_identity.network_type(); + + assert_eq!(network_type, "residential"); + } +} diff --git a/node-metrics/src/service/server_message/mod.rs b/node-metrics/src/service/server_message/mod.rs index 2fe8f41f0..3b86b598a 100644 --- a/node-metrics/src/service/server_message/mod.rs +++ b/node-metrics/src/service/server_message/mod.rs @@ -1,4 +1,7 @@ -use super::client_id::ClientId; +use std::sync::Arc; + +use super::{client_id::ClientId, data_state::NodeIdentity}; +use bitvec::vec::BitVec; use hotshot_query_service::explorer::{BlockDetail, ExplorerHistograms}; use sequencer::SeqTypes; use serde::{Deserialize, Serialize}; @@ -12,35 +15,47 @@ pub enum ServerMessage { /// LatestBlock is a message that is meant to show the most recent block /// that has arrived. - LatestBlock(BlockDetail), + LatestBlock(Arc>), /// LatestNodeIdentity is a message that is meant to show the most recent /// node identity that has arrived. - LatestNodeIdentity, + LatestNodeIdentity(Arc), + + /// LatestVoters is a message that is meant to show the most recent + /// voters that have arrived. + LatestVoters(BitVec), /// BlocksSnapshot is a message that is sent in response to a request for /// the snapshot of block information that is available. - BlocksSnapshot(Vec>), + BlocksSnapshot(Arc>>), /// NodeIdentitySnapshot is a message that is sent in response to a request /// for the snapshot of the current node identity information. - NodeIdentitySnapshot, + NodeIdentitySnapshot(Arc>), /// HistogramSnapshot is a message that is sent in response to to a request /// for the snapshot of the current histogram information. - HistogramSnapshot(ExplorerHistograms), + HistogramSnapshot(Arc), + + /// VotersSnapshot is a message that is sent in response to a request for + /// the snapshot of the current voters information. + VotersSnapshot(Arc>), } impl PartialEq for ServerMessage { fn eq(&self, other: &Self) -> bool { match (self, other) { - (Self::YouAre(l0), Self::YouAre(r0)) => l0 == r0, - (Self::LatestBlock(l0), Self::LatestBlock(r0)) => l0 == r0, - (Self::LatestNodeIdentity, Self::LatestNodeIdentity) => true, - (Self::BlocksSnapshot(l0), Self::BlocksSnapshot(r0)) => l0 == r0, - (Self::NodeIdentitySnapshot, Self::NodeIdentitySnapshot) => true, + (Self::YouAre(lhs), Self::YouAre(rhg)) => lhs == rhg, + (Self::LatestBlock(lhs), Self::LatestBlock(rhs)) => lhs == rhs, + (Self::LatestNodeIdentity(lhs), Self::LatestNodeIdentity(rhs)) => lhs == rhs, + (Self::BlocksSnapshot(lhs), Self::BlocksSnapshot(rhs)) => lhs == rhs, + (Self::NodeIdentitySnapshot(lhs), Self::NodeIdentitySnapshot(rhs)) => lhs == rhs, (Self::HistogramSnapshot(_), Self::HistogramSnapshot(_)) => false, + (Self::VotersSnapshot(lhs), Self::VotersSnapshot(rhs)) => lhs == rhs, _ => false, } } } + +#[cfg(test)] +mod tests {} From 61e136366d1202065cecd50bcba3345be4a39b68 Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Mon, 8 Jul 2024 08:34:51 -0600 Subject: [PATCH 09/72] Rename and add comments to assumption tests --- node-metrics/src/service/client_state/mod.rs | 83 +++++++++++++++++--- 1 file changed, 70 insertions(+), 13 deletions(-) diff --git a/node-metrics/src/service/client_state/mod.rs b/node-metrics/src/service/client_state/mod.rs index f45711e74..09a24998d 100644 --- a/node-metrics/src/service/client_state/mod.rs +++ b/node-metrics/src/service/client_state/mod.rs @@ -1798,8 +1798,21 @@ pub mod tests { } } + // The following tests codify assumptions being bad on behalf of the Sender + // and Receivers provided by the async_std library. The purpose of these + // tests are to document these assumptions, and add a test to ensure that + // they behave as expected. If they ever do not behave as expected, then + // the rest of this library will need to be modified to account for that + // change in behavior. + + /// Tests the behavior of the sender and receiver when the sender is + /// dropped before the receiver is polled. + /// + /// This is a separate library test to ensure that the behavior that this + /// library is built on top of does not introduce a change that would + /// make this library no longer operate correctly. #[async_std::test] - async fn test_channel_assumption_1() { + async fn test_sender_receiver_behavior_drop_sender_before_receiver_polled_closes_receiver() { let (sender, mut receiver) = mpsc::channel::(1); drop(sender); @@ -1807,18 +1820,31 @@ pub mod tests { assert_eq!(receiver.next().await, None); } + /// Tests the behavior of the sender and receiver when the sender is + /// dropped after the receiver is polled. + /// + /// This is a separate library test to ensure that the behavior that this + /// library is built on top of does not introduce a change that would + /// make this library no longer operate correctly. #[async_std::test] - async fn test_channel_assumption_2() { + async fn test_sender_receiver_behavior_drop_sender_after_receiver_polled_closes_receiver() { let (sender, mut receiver) = mpsc::channel::(1); let join_handle = async_std::task::spawn(async move { receiver.next().await }); + async_std::task::sleep(Duration::from_millis(100)).await; drop(sender); assert_eq!(join_handle.await, None); } + /// Tests the behavior of the sender and receiver when the receiver is + /// dropped before anything is sent across the Sender. + /// + /// This is a separate library test to ensure that the behavior that this + /// library is built on top of does not introduce a change that would + /// make this library no longer operate correctly. #[async_std::test] - async fn test_channel_assumption_3() { + async fn test_sender_receiver_behavior_drop_receiver_before_sender_sends() { let (mut sender, receiver) = mpsc::channel(1); drop(receiver); @@ -1826,8 +1852,14 @@ pub mod tests { assert_ne!(sender.send(1).await, Ok(())); } + /// Tests the behavior of the sender and receiver when the receiver is + /// dropped after the sender has sent a value. + /// + /// This is a separate library test to ensure that the behavior that this + /// library is built on top of does not introduce a change that would + /// make this library no longer operate correctly. #[async_std::test] - async fn test_channel_assumption_4() { + async fn test_sender_receiver_behavior_drop_receiver_after_sender_sends() { let (mut sender, mut receiver) = mpsc::channel(1); let join_handle = async_std::task::spawn(async move { @@ -1843,24 +1875,49 @@ pub mod tests { assert_ne!(join_handle.await, Ok(())); } + /// Tests to ensure that time timeout on an already ready future does not + /// cause the future to be dropped. #[async_std::test] - async fn test_timeout_assumption_1() { + async fn test_timeout_on_already_ready_future() { assert_eq!( - async_std::future::timeout(std::time::Duration::from_millis(100), async move { 1u64 }) - .await, + futures::future::ready(1u64).timeout(Duration::ZERO).await, Ok(1u64) ); } + /// Tests to ensure that time timeout on a pending future does not cause the + /// future to be dropped. + #[async_std::test] + async fn test_timeout_on_async_block_resolves_when_polled() { + assert_eq!(async move { 1u64 }.timeout(Duration::ZERO).await, Ok(1u64),); + + assert_eq!( + async move { 1u64 } + .timeout(Duration::from_millis(100)) + .await, + Ok(1u64), + ); + } + + /// Tests to ensure that time timeout on a pending future does not cause the + /// future to be dropped. #[async_std::test] - async fn test_timeout_assumption_2() { + async fn test_timeout_on_pending_future_times_out() { assert_ne!( - async_std::future::timeout( - std::time::Duration::from_millis(100), - futures::future::pending::() - ) - .await, + async_std::future::timeout(Duration::ZERO, futures::future::pending::()).await, Ok(1u64) ); } + + /// Tests to ensure that bitvec is directly comparable without needing to + /// worry about their instances points to the same memory. + #[test] + fn test_bitvec_is_comparable() { + let bitvec_1: BitVec = BitVec::from_vec(vec![0x55]); + let bitvec_2: BitVec = BitVec::from_vec(vec![0x55]); + let bitvec_3: BitVec = BitVec::from_vec(vec![0xAA]); + + assert_eq!(bitvec_1, bitvec_2); + assert_ne!(bitvec_1, bitvec_3); + } } From 9c97ec7be429f1893b1001a2caec891543782839 Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Mon, 8 Jul 2024 08:36:10 -0600 Subject: [PATCH 10/72] Add specific errors for tide_disco::Error and surf_disco::Error --- node-metrics/src/api/node_validator/v0/mod.rs | 27 +++++++++++++------ 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/node-metrics/src/api/node_validator/v0/mod.rs b/node-metrics/src/api/node_validator/v0/mod.rs index bcfd436d1..093e8c5d9 100644 --- a/node-metrics/src/api/node_validator/v0/mod.rs +++ b/node-metrics/src/api/node_validator/v0/mod.rs @@ -32,33 +32,44 @@ pub type Version01 = StaticVersion; pub const STATIC_VER_0_1: Version01 = StaticVersion {}; #[derive(Debug, Serialize, Deserialize)] -pub enum Error {} +pub enum Error { + UnhandledTideDisco(tide_disco::StatusCode, String), + UnhandledSurfDisco(surf_disco::StatusCode, String), +} impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "Error") + match self { + Self::UnhandledSurfDisco(status, msg) => { + write!(f, "Unhandled Surf Disco Error: {} - {}", status, msg) + } + + Self::UnhandledTideDisco(status, msg) => { + write!(f, "Unhandled Tide Disco Error: {} - {}", status, msg) + } + } } } impl std::error::Error for Error {} impl tide_disco::Error for Error { - fn catch_all(_status: tide_disco::StatusCode, _msg: String) -> Self { - todo!() + fn catch_all(status: tide_disco::StatusCode, msg: String) -> Self { + Self::UnhandledTideDisco(status, msg) } fn status(&self) -> tide_disco::StatusCode { - todo!() + tide_disco::StatusCode::INTERNAL_SERVER_ERROR } } impl surf_disco::Error for Error { - fn catch_all(_status: surf_disco::StatusCode, _msg: String) -> Self { - todo!() + fn catch_all(status: surf_disco::StatusCode, msg: String) -> Self { + Self::UnhandledSurfDisco(status, msg) } fn status(&self) -> surf_disco::StatusCode { - todo!() + surf_disco::StatusCode::INTERNAL_SERVER_ERROR } } From 605ddee2714af8bce1d9e629670002c8dd719030 Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Mon, 8 Jul 2024 08:42:21 -0600 Subject: [PATCH 11/72] Replace `async_std::io::timeout` with prelude FutureExt `.timeout` Rename some local variables in tests for better clarity --- node-metrics/src/service/client_state/mod.rs | 126 ++++++++----------- 1 file changed, 52 insertions(+), 74 deletions(-) diff --git a/node-metrics/src/service/client_state/mod.rs b/node-metrics/src/service/client_state/mod.rs index 09a24998d..6d0a4e6ce 100644 --- a/node-metrics/src/service/client_state/mod.rs +++ b/node-metrics/src/service/client_state/mod.rs @@ -984,9 +984,9 @@ pub mod tests { }, server_message::ServerMessage, }; - use async_std::sync::RwLock; + use async_std::{prelude::FutureExt, sync::RwLock}; use bitvec::vec::BitVec; - use futures::{channel::mpsc, FutureExt, SinkExt, StreamExt}; + use futures::{channel::mpsc, SinkExt, StreamExt}; use hotshot_types::{signature_key::BLSPubKey, traits::signature_key::SignatureKey}; use sequencer::{Leaf, NodeState, ValidatedState}; use std::{ @@ -1067,7 +1067,7 @@ pub mod tests { let (mut internal_client_message_sender, internal_client_message_receiver) = mpsc::channel(1); - let process_client_handling_stream_handle: async_std::task::JoinHandle<()> = + let process_internal_client_message_handle: async_std::task::JoinHandle<()> = async_std::task::spawn(process_internal_client_message_stream( internal_client_message_receiver, data_state, @@ -1078,14 +1078,12 @@ pub mod tests { internal_client_message_sender.disconnect(); // Join the async task. - if let Err(timeout_error) = async_std::io::timeout( - Duration::from_millis(200), - process_client_handling_stream_handle.map(Ok), - ) - .await + if let Err(timeout_error) = process_internal_client_message_handle + .timeout(Duration::from_millis(200)) + .await { panic!( - "process_client_handling_stream_handle did not complete in time, error: {}", + "process_internal_client_message_handle did not complete in time, error: {}", timeout_error ); } @@ -1105,7 +1103,7 @@ pub mod tests { let (internal_client_message_sender, internal_client_message_receiver) = mpsc::channel(1); let (server_message_sender_1, mut server_message_receiver_1) = mpsc::channel(1); let (server_message_sender_2, mut server_message_receiver_2) = mpsc::channel(1); - let process_client_handling_stream_handle = + let process_internal_client_message_handle = async_std::task::spawn(process_internal_client_message_stream( internal_client_message_receiver, data_state, @@ -1164,14 +1162,12 @@ pub mod tests { assert_eq!(server_message_receiver_1.next().await, None); assert_eq!(server_message_receiver_2.next().await, None); - if let Err(timeout_error) = async_std::io::timeout( - Duration::from_millis(200), - process_client_handling_stream_handle.map(Ok), - ) - .await + if let Err(timeout_error) = process_internal_client_message_handle + .timeout(Duration::from_millis(200)) + .await { panic!( - "process_client_handling_stream_handle did not complete in time, error: {}", + "process_internal_client_message_handle did not complete in time, error: {}", timeout_error ); } @@ -1194,7 +1190,7 @@ pub mod tests { let (internal_client_message_sender, internal_client_message_receiver) = mpsc::channel(1); let (server_message_sender_1, mut server_message_receiver_1) = mpsc::channel(1); let (server_message_sender_2, mut server_message_receiver_2) = mpsc::channel(1); - let process_client_handling_stream_handle = + let process_internal_client_message_handle = async_std::task::spawn(process_internal_client_message_stream( internal_client_message_receiver, data_state, @@ -1252,14 +1248,12 @@ pub mod tests { assert_eq!(server_message_receiver_2.next().await, None); // Join the async task. - if let Err(timeout_error) = async_std::io::timeout( - Duration::from_millis(200), - process_client_handling_stream_handle.map(Ok), - ) - .await + if let Err(timeout_error) = process_internal_client_message_handle + .timeout(Duration::from_millis(200)) + .await { panic!( - "process_client_handling_stream_handle did not complete in time, error: {}", + "process_internal_client_message_handle did not complete in time, error: {}", timeout_error ); } @@ -1274,7 +1268,7 @@ pub mod tests { let (internal_client_message_sender, internal_client_message_receiver) = mpsc::channel(1); let (server_message_sender_1, mut server_message_receiver_1) = mpsc::channel(1); let (server_message_sender_2, mut server_message_receiver_2) = mpsc::channel(1); - let process_client_handling_stream_handle: async_std::task::JoinHandle<()> = + let process_internal_client_message_handle: async_std::task::JoinHandle<()> = async_std::task::spawn(process_internal_client_message_stream( internal_client_message_receiver, data_state, @@ -1339,14 +1333,12 @@ pub mod tests { assert_eq!(server_message_receiver_2.next().await, None); // Join the async task. - if let Err(timeout_error) = async_std::io::timeout( - Duration::from_millis(200), - process_client_handling_stream_handle.map(Ok), - ) - .await + if let Err(timeout_error) = process_internal_client_message_handle + .timeout(Duration::from_millis(200)) + .await { panic!( - "process_client_handling_stream_handle did not complete in time, error: {}", + "process_internal_client_message_handle did not complete in time, error: {}", timeout_error ); } @@ -1364,14 +1356,14 @@ pub mod tests { let (server_message_sender_1, mut server_message_receiver_1) = mpsc::channel(1); let (server_message_sender_2, mut server_message_receiver_2) = mpsc::channel(1); let (server_message_sender_3, mut server_message_receiver_3) = mpsc::channel(1); - let process_client_handling_stream_handle = + let process_internal_client_message_handle = async_std::task::spawn(process_internal_client_message_stream( internal_client_message_receiver, data_state.clone(), client_thread_state.clone(), )); - let process_distribute_client_handling_handle = + let process_distribute_block_detail_handle = async_std::task::spawn(process_distribute_block_detail_handling_stream( client_thread_state, block_detail_receiver, @@ -1465,11 +1457,9 @@ pub mod tests { leaf_sender.disconnect(); // Join the async task. - if let Err(timeout_error) = async_std::io::timeout( - Duration::from_millis(200), - process_leaf_stream_handle.map(Ok), - ) - .await + if let Err(timeout_error) = process_leaf_stream_handle + .timeout(Duration::from_millis(200)) + .await { panic!( "process_leaf_stream_handle did not complete in time, error: {}", @@ -1478,11 +1468,9 @@ pub mod tests { } // Join the async task. - if let Err(timeout_error) = async_std::io::timeout( - Duration::from_millis(200), - process_distribute_client_handling_handle.map(Ok), - ) - .await + if let Err(timeout_error) = process_distribute_block_detail_handle + .timeout(Duration::from_millis(200)) + .await { panic!( "process_distribute_client_handling_handle did not complete in time, error: {}", @@ -1502,14 +1490,12 @@ pub mod tests { assert_eq!(server_message_receiver_3.next().await, None); // Join the async task. - if let Err(timeout_error) = async_std::io::timeout( - Duration::from_millis(200), - process_client_handling_stream_handle.map(Ok), - ) - .await + if let Err(timeout_error) = process_internal_client_message_handle + .timeout(Duration::from_millis(200)) + .await { panic!( - "process_client_handling_stream_handle did not complete in time, error: {}", + "process_internal_client_message_handle did not complete in time, error: {}", timeout_error ); } @@ -1526,14 +1512,14 @@ pub mod tests { let (server_message_sender_1, mut server_message_receiver_1) = mpsc::channel(1); let (server_message_sender_2, mut server_message_receiver_2) = mpsc::channel(1); let (server_message_sender_3, mut server_message_receiver_3) = mpsc::channel(1); - let process_client_handling_stream_handle = + let process_internal_client_message_handle = async_std::task::spawn(process_internal_client_message_stream( internal_client_message_receiver, data_state.clone(), client_thread_state.clone(), )); - let process_distribute_client_handling_handle = + let process_distribute_node_identity_handle = async_std::task::spawn(process_distribute_node_identity_handling_stream( client_thread_state, node_identity_receiver, @@ -1623,14 +1609,12 @@ pub mod tests { node_identity_sender.disconnect(); // Join the async task. - if let Err(timeout_error) = async_std::io::timeout( - Duration::from_millis(200), - process_distribute_client_handling_handle.map(Ok), - ) - .await + if let Err(timeout_error) = process_distribute_node_identity_handle + .timeout(Duration::from_millis(200)) + .await { panic!( - "process_distribute_client_handling_handle did not complete in time, error: {}", + "process_distribute_node_identity_handle did not complete in time, error: {}", timeout_error ); } @@ -1647,14 +1631,12 @@ pub mod tests { assert_eq!(server_message_receiver_3.next().await, None); // Join the async task. - if let Err(timeout_error) = async_std::io::timeout( - Duration::from_millis(200), - process_client_handling_stream_handle.map(Ok), - ) - .await + if let Err(timeout_error) = process_internal_client_message_handle + .timeout(Duration::from_millis(200)) + .await { panic!( - "process_client_handling_stream_handle did not complete in time, error: {}", + "process_internal_client_message_handle did not complete in time, error: {}", timeout_error ); } @@ -1671,7 +1653,7 @@ pub mod tests { let (server_message_sender_1, mut server_message_receiver_1) = mpsc::channel(1); let (server_message_sender_2, mut server_message_receiver_2) = mpsc::channel(1); let (server_message_sender_3, mut server_message_receiver_3) = mpsc::channel(1); - let process_client_handling_stream_handle = + let process_internal_client_message_handle = async_std::task::spawn(process_internal_client_message_stream( internal_client_message_receiver, data_state.clone(), @@ -1761,11 +1743,9 @@ pub mod tests { voters_sender.disconnect(); // Join the async task. - if let Err(timeout_error) = async_std::io::timeout( - Duration::from_millis(200), - process_distribute_voters_handle.map(Ok), - ) - .await + if let Err(timeout_error) = process_distribute_voters_handle + .timeout(Duration::from_millis(200)) + .await { panic!( "process_distribute_voters_handle did not complete in time, error: {}", @@ -1785,14 +1765,12 @@ pub mod tests { assert_eq!(server_message_receiver_3.next().await, None); // Join the async task. - if let Err(timeout_error) = async_std::io::timeout( - Duration::from_millis(200), - process_client_handling_stream_handle.map(Ok), - ) - .await + if let Err(timeout_error) = process_internal_client_message_handle + .timeout(Duration::from_millis(200)) + .await { panic!( - "process_client_handling_stream_handle did not complete in time, error: {}", + "process_internal_client_message_handle did not complete in time, error: {}", timeout_error ); } From eccb8aed684ec6204a3afe2f890360627e20fcca Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Mon, 8 Jul 2024 08:43:26 -0600 Subject: [PATCH 12/72] Add voters Sender and Receiver processing to `process_leaf_stream` --- node-metrics/src/api/node_validator/v0/mod.rs | 3 ++- node-metrics/src/service/client_state/mod.rs | 18 ++++++++++++- node-metrics/src/service/data_state/mod.rs | 25 ++++++++++++++++--- 3 files changed, 41 insertions(+), 5 deletions(-) diff --git a/node-metrics/src/api/node_validator/v0/mod.rs b/node-metrics/src/api/node_validator/v0/mod.rs index 093e8c5d9..673282871 100644 --- a/node-metrics/src/api/node_validator/v0/mod.rs +++ b/node-metrics/src/api/node_validator/v0/mod.rs @@ -354,7 +354,7 @@ mod tests { let (block_detail_sender, block_detail_receiver) = mpsc::channel(32); let (leaf_sender, leaf_receiver) = mpsc::channel(32); let (_node_identity_sender, node_identity_receiver) = mpsc::channel(32); - let (_voters_sender, voters_receiver) = mpsc::channel(32); + let (voters_sender, voters_receiver) = mpsc::channel(32); let _process_internal_client_message_handle = async_std::task::spawn(process_internal_client_message_stream( @@ -383,6 +383,7 @@ mod tests { leaf_receiver, data_state.clone(), block_detail_sender, + voters_sender, )); let _leaf_retriever_handle = async_std::task::spawn(async move { diff --git a/node-metrics/src/service/client_state/mod.rs b/node-metrics/src/service/client_state/mod.rs index 6d0a4e6ce..726822d30 100644 --- a/node-metrics/src/service/client_state/mod.rs +++ b/node-metrics/src/service/client_state/mod.rs @@ -1352,6 +1352,7 @@ pub mod tests { let (mut leaf_sender, leaf_receiver) = mpsc::channel(1); let (block_detail_sender, block_detail_receiver) = mpsc::channel(1); + let (voters_sender, voters_receiver) = mpsc::channel(1); let (internal_client_message_sender, internal_client_message_receiver) = mpsc::channel(1); let (server_message_sender_1, mut server_message_receiver_1) = mpsc::channel(1); let (server_message_sender_2, mut server_message_receiver_2) = mpsc::channel(1); @@ -1365,14 +1366,19 @@ pub mod tests { let process_distribute_block_detail_handle = async_std::task::spawn(process_distribute_block_detail_handling_stream( - client_thread_state, + client_thread_state.clone(), block_detail_receiver, )); + let process_distribute_voters_handle = async_std::task::spawn( + process_distribute_voters_handling_stream(client_thread_state, voters_receiver), + ); + let process_leaf_stream_handle = async_std::task::spawn(process_leaf_stream( leaf_receiver, data_state, block_detail_sender, + voters_sender, )); // Send a Connected Message to the server @@ -1478,6 +1484,16 @@ pub mod tests { ); } + if let Err(timeout_error) = process_distribute_voters_handle + .timeout(Duration::from_millis(200)) + .await + { + panic!( + "process_distribute_voters_handle did not complete in time, error: {}", + timeout_error + ); + } + // disconnect the last internal client message sender internal_client_message_sender_1.disconnect(); internal_client_message_sender_2.disconnect(); diff --git a/node-metrics/src/service/data_state/mod.rs b/node-metrics/src/service/data_state/mod.rs index 5651ae836..155a8fb7a 100644 --- a/node-metrics/src/service/data_state/mod.rs +++ b/node-metrics/src/service/data_state/mod.rs @@ -152,6 +152,7 @@ async fn process_incoming_leaf( leaf: Leaf, data_state: Arc>, mut block_sender: Sender>, + mut voters_sender: Sender, ) -> Result<(), ProcessLeafError> where Header: BlockHeader + QueryableHeader + ExplorerHeader, @@ -225,7 +226,7 @@ where .push_back(block_detail); data_state_write_lock_guard .latest_voters - .push_back(voters_bitvec); + .push_back(voters_bitvec.clone()); drop(data_state_write_lock_guard); @@ -234,6 +235,11 @@ where return Err(ProcessLeafError::SendError(err)); } + if let Err(err) = voters_sender.send(voters_bitvec).await { + // We have an error that prevents us from continuing + return Err(ProcessLeafError::SendError(err)); + } + Ok(()) } @@ -243,6 +249,7 @@ pub async fn process_leaf_stream( mut stream: S, data_state: Arc>, block_sender: Sender>, + voters_senders: Sender, ) where S: Stream> + Unpin, Header: BlockHeader + QueryableHeader + ExplorerHeader, @@ -258,8 +265,13 @@ pub async fn process_leaf_stream( return; }; - if let Err(err) = - process_incoming_leaf(leaf, data_state.clone(), block_sender.clone()).await + if let Err(err) = process_incoming_leaf( + leaf, + data_state.clone(), + block_sender.clone(), + voters_senders.clone(), + ) + .await { // We have an error that prevents us from continuing tracing::info!("process leaf stream: error processing leaf: {}", err); @@ -304,12 +316,14 @@ mod tests { let data_state: DataState = Default::default(); let data_state = Arc::new(RwLock::new(data_state)); let (block_sender, block_receiver) = futures::channel::mpsc::channel(1); + let (voters_sender, voters_receiver) = futures::channel::mpsc::channel(1); let (leaf_sender, leaf_receiver) = futures::channel::mpsc::channel(1); let process_leaf_stream_task_handle = async_std::task::spawn(process_leaf_stream( leaf_receiver, data_state.clone(), block_sender, + voters_sender, )); { @@ -339,6 +353,11 @@ mod tests { let next_block = block_receiver.next().await; assert!(next_block.is_some()); + let mut voters_receiver = voters_receiver; + // We should receive a BitVec of voters. + let next_voters = voters_receiver.next().await; + assert!(next_voters.is_some()); + { let data_state = data_state.read().await; // Latest blocks should now have a single entry From 6fb7fc2f12f362ebcebda5866a0595a78c879ea5 Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Mon, 8 Jul 2024 08:44:07 -0600 Subject: [PATCH 13/72] Fix missing equality implementation for `LatestVoters` `ServerMessage` --- node-metrics/src/service/server_message/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/node-metrics/src/service/server_message/mod.rs b/node-metrics/src/service/server_message/mod.rs index 3b86b598a..87fafdf82 100644 --- a/node-metrics/src/service/server_message/mod.rs +++ b/node-metrics/src/service/server_message/mod.rs @@ -48,6 +48,7 @@ impl PartialEq for ServerMessage { (Self::YouAre(lhs), Self::YouAre(rhg)) => lhs == rhg, (Self::LatestBlock(lhs), Self::LatestBlock(rhs)) => lhs == rhs, (Self::LatestNodeIdentity(lhs), Self::LatestNodeIdentity(rhs)) => lhs == rhs, + (Self::LatestVoters(lhs), Self::LatestVoters(rhs)) => lhs == rhs, (Self::BlocksSnapshot(lhs), Self::BlocksSnapshot(rhs)) => lhs == rhs, (Self::NodeIdentitySnapshot(lhs), Self::NodeIdentitySnapshot(rhs)) => lhs == rhs, (Self::HistogramSnapshot(_), Self::HistogramSnapshot(_)) => false, From 0b414634f53434c64248b5e74f2a04e5ab77be6a Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Mon, 8 Jul 2024 08:44:58 -0600 Subject: [PATCH 14/72] Format and sort Cargo.toml dependencies --- node-metrics/Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/node-metrics/Cargo.toml b/node-metrics/Cargo.toml index 22fccbded..99a32388c 100644 --- a/node-metrics/Cargo.toml +++ b/node-metrics/Cargo.toml @@ -17,14 +17,14 @@ futures = { workspace = true } hotshot-query-service = { workspace = true } hotshot-stake-table = { workspace = true } hotshot-types = { workspace = true } -sequencer = { path = "../sequencer"} +sequencer = { path = "../sequencer" } serde = { workspace = true } serde_json = { version = "^1.0.113", optional = true } surf-disco = { workspace = true } tide-disco = { version = "0.9.0" } time = { workspace = true } -tracing = { workspace = true } toml = { workspace = true } +tracing = { workspace = true } vbs = { workspace = true } # Dependencies for feature `testing` From 05014b86b2e549ee760d1364c30170773d8be6cd Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Mon, 8 Jul 2024 08:48:26 -0600 Subject: [PATCH 15/72] Remove unused lib types --- node-metrics/src/lib.rs | 27 --------------------------- node-metrics/src/test/mod.rs | 1 - 2 files changed, 28 deletions(-) delete mode 100644 node-metrics/src/test/mod.rs diff --git a/node-metrics/src/lib.rs b/node-metrics/src/lib.rs index 0e44f43d8..b091aa0b3 100644 --- a/node-metrics/src/lib.rs +++ b/node-metrics/src/lib.rs @@ -97,30 +97,3 @@ pub mod api; pub mod service; - -#[cfg(test)] -pub mod test; - -/// Storage is a general purpose trait that allows for the storage of -/// arbitrary data. This trait allows for the specification of the -/// Get result to be different than that of the Set result. This should -/// allow for a larger degree of flexibility when it comes to storing things. -pub trait Storage { - type Get; - type Set; - fn get(&self) -> Self::Get; - fn set(&mut self, value: Self::Set); -} - -/// KeyValueStorage is a general purpose trait that allows for the storage -/// of key value pairs. This trait allows for the specification of the -/// Key and Value types to be different. This should allow for a larger -/// degree of flexibility when it comes to storing things. -pub trait KeyValueStorage { - type Key: Eq; - type Value: Clone; - fn get(&self, key: &Self::Key) -> &Self::Value; - fn set(&mut self, key: &Self::Key, value: Self::Value); -} - -pub struct NodeInformation {} diff --git a/node-metrics/src/test/mod.rs b/node-metrics/src/test/mod.rs deleted file mode 100644 index 8b1378917..000000000 --- a/node-metrics/src/test/mod.rs +++ /dev/null @@ -1 +0,0 @@ - From 09b961b87fbbf3046a5a61655c7af139fd0e18ad Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Mon, 8 Jul 2024 09:12:32 -0600 Subject: [PATCH 16/72] Relocate the leaf stream construction There will be a need to attempt to be resilient and be able to recover from certain classes of errors. As such it is helpful to be able to create a Stream from a given Client as needed. This will allow for future changes to be able to re create the stream in the event of a disconnection (which will happen). --- node-metrics/src/api/node_validator/v0/mod.rs | 79 ++++++++++++++----- 1 file changed, 58 insertions(+), 21 deletions(-) diff --git a/node-metrics/src/api/node_validator/v0/mod.rs b/node-metrics/src/api/node_validator/v0/mod.rs index 673282871..6f803e015 100644 --- a/node-metrics/src/api/node_validator/v0/mod.rs +++ b/node-metrics/src/api/node_validator/v0/mod.rs @@ -284,9 +284,60 @@ where Ok(api) } +/// [stream_leaves_from_hotshot_query_service] retrieves a stream of +/// [sequencer::Leaf]s from the Hotshot Query Service. It expects a +/// [current_block_height] to be provided so that it can determine the starting +/// block height to begin streaming from. No matter what the value of +/// [current_block_height] is the stream will always check what the latest +/// block height is on the hotshot query service. It will then attempt to +/// pull as few Leafs as it needs from the stream. +pub async fn stream_leaves_from_hotshot_query_service( + current_block_height: Option, + client: surf_disco::Client, +) -> Result< + impl futures::Stream> + Unpin, + hotshot_query_service::Error, +> { + let block_height_result = client.get("status/block-height").send().await; + let block_height: u64 = match block_height_result { + Ok(block_height) => block_height, + Err(err) => { + tracing::info!("retrieve block height request failed: {}", err); + return Err(err); + } + }; + + let latest_block_start = block_height.saturating_sub(50); + let start_block_height = if let Some(known_height) = current_block_height { + std::cmp::min(known_height, latest_block_start) + } else { + latest_block_start + }; + + let leaves_stream_result = client + .socket(&format!( + "availability/stream/leaves/{}", + start_block_height + )) + .subscribe::() + .await; + + let leaves_stream = match leaves_stream_result { + Ok(leaves_stream) => leaves_stream, + Err(err) => { + tracing::info!("retrieve leaves stream failed: {}", err); + return Err(err); + } + }; + + Ok(leaves_stream) +} + #[cfg(test)] mod tests { - use super::{Error, StateClientMessageSender, Version01, STATIC_VER_0_1}; + use super::{ + stream_leaves_from_hotshot_query_service, Error, StateClientMessageSender, STATIC_VER_0_1, + }; use crate::service::{ client_id::ClientId, client_message::InternalClientMessage, @@ -303,7 +354,6 @@ mod tests { channel::mpsc::{self, Sender}, SinkExt, StreamExt, }; - use sequencer::Leaf; use std::sync::Arc; use tide_disco::App; @@ -316,6 +366,7 @@ mod tests { } #[async_std::test] + #[ignore] async fn test_api_creation() { let node_validator_api_result = super::define_api::(); @@ -389,34 +440,20 @@ mod tests { let _leaf_retriever_handle = async_std::task::spawn(async move { // Alright, let's get some leaves, bro - let client: surf_disco::Client = surf_disco::Client::new( + let client = surf_disco::Client::new( "https://query.cappuccino.testnet.espresso.network/v0" .parse() .unwrap(), ); - let block_height_result = client.get("status/block-height").send().await; - let block_height: u64 = if let Ok(block_height) = block_height_result { - block_height - } else { - tracing::info!("block height request failed"); - return; - }; - - let start_block_height = block_height.saturating_sub(50); - - let mut leaf_sender = leaf_sender; - let mut leaves = client - .socket(&format!( - "availability/stream/leaves/{}", - start_block_height - )) - .subscribe::() + let mut leaf_stream = stream_leaves_from_hotshot_query_service(None, client) .await .unwrap(); + let mut leaf_sender = leaf_sender; + loop { - let leaf_result = leaves.next().await; + let leaf_result = leaf_stream.next().await; let leaf = if let Some(Ok(leaf)) = leaf_result { leaf } else { From 3d611dc1214a6ed41b0dbc24e4922df5aafb649c Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Mon, 8 Jul 2024 15:41:38 -0600 Subject: [PATCH 17/72] Refactor NodeIdentity fields to be Optional --- node-metrics/src/service/client_state/mod.rs | 42 ++++----- .../src/service/data_state/node_identity.rs | 87 ++++++++++--------- 2 files changed, 66 insertions(+), 63 deletions(-) diff --git a/node-metrics/src/service/client_state/mod.rs b/node-metrics/src/service/client_state/mod.rs index 726822d30..bc526fcc2 100644 --- a/node-metrics/src/service/client_state/mod.rs +++ b/node-metrics/src/service/client_state/mod.rs @@ -1010,14 +1010,14 @@ pub mod tests { let (pub_key, _) = BLSPubKey::generated_from_seed_indexed([0; 32], 0); NodeIdentity::new( pub_key, - "a".to_string(), - Default::default(), - vec![IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1))], - "company".to_string(), + Some("a".to_string()), + Some(Default::default()), + Some(vec![IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1))]), + Some("company".to_string()), Some(LocationDetails::new((0.0, 0.0), "US".to_string())), - "Windows 11".to_string(), - "espresso".to_string(), - "residential".to_string(), + Some("Windows 11".to_string()), + Some("espresso".to_string()), + Some("residential".to_string()), ) }; @@ -1025,14 +1025,14 @@ pub mod tests { let (pub_key, _) = BLSPubKey::generated_from_seed_indexed([0; 32], 1); NodeIdentity::new( pub_key, - "b".to_string(), - Default::default(), - vec![IpAddr::V4(Ipv4Addr::new(127, 0, 0, 2))], - "company".to_string(), + Some("b".to_string()), + Some(Default::default()), + Some(vec![IpAddr::V4(Ipv4Addr::new(127, 0, 0, 2))]), + Some("company".to_string()), Some(LocationDetails::new((0.0, 0.0), "US".to_string())), - "Windows 11".to_string(), - "espresso".to_string(), - "residential".to_string(), + Some("Windows 11".to_string()), + Some("espresso".to_string()), + Some("residential".to_string()), ) }; @@ -1040,14 +1040,14 @@ pub mod tests { let (pub_key, _) = BLSPubKey::generated_from_seed_indexed([0; 32], 2); NodeIdentity::new( pub_key, - "b".to_string(), - Default::default(), - vec![IpAddr::V4(Ipv4Addr::new(127, 0, 0, 3))], - "company".to_string(), + Some("b".to_string()), + Some(Default::default()), + Some(vec![IpAddr::V4(Ipv4Addr::new(127, 0, 0, 3))]), + Some("company".to_string()), Some(LocationDetails::new((0.0, 0.0), "US".to_string())), - "Windows 11".to_string(), - "espresso".to_string(), - "residential".to_string(), + Some("Windows 11".to_string()), + Some("espresso".to_string()), + Some("residential".to_string()), ) }; diff --git a/node-metrics/src/service/data_state/node_identity.rs b/node-metrics/src/service/data_state/node_identity.rs index 823ec25fe..4a1c9806e 100644 --- a/node-metrics/src/service/data_state/node_identity.rs +++ b/node-metrics/src/service/data_state/node_identity.rs @@ -9,28 +9,28 @@ use std::net::IpAddr; #[derive(Clone, PartialEq, Debug, Serialize, Deserialize)] pub struct NodeIdentity { public_key: BLSPubKey, - name: String, - wallet_address: FeeAccount, - ip_addresses: Vec, - company: String, + name: Option, + wallet_address: Option, + ip_addresses: Option>, + company: Option, location: Option, - operating_system: String, - node_type: String, - network_type: String, + operating_system: Option, + node_type: Option, + network_type: Option, } impl NodeIdentity { #[allow(clippy::too_many_arguments)] pub fn new( public_key: BLSPubKey, - name: String, - wallet_address: FeeAccount, - ip_addresses: Vec, - company: String, + name: Option, + wallet_address: Option, + ip_addresses: Option>, + company: Option, location: Option, - operating_system: String, - node_type: String, - network_type: String, + operating_system: Option, + node_type: Option, + network_type: Option, ) -> Self { Self { public_key, @@ -49,19 +49,19 @@ impl NodeIdentity { &self.public_key } - pub fn name(&self) -> &str { + pub fn name(&self) -> &Option { &self.name } - pub fn wallet_address(&self) -> &FeeAccount { + pub fn wallet_address(&self) -> &Option { &self.wallet_address } - pub fn ip_addresses(&self) -> &[IpAddr] { + pub fn ip_addresses(&self) -> &Option> { &self.ip_addresses } - pub fn company(&self) -> &str { + pub fn company(&self) -> &Option { &self.company } @@ -69,29 +69,29 @@ impl NodeIdentity { self.location.as_ref() } - pub fn operating_system(&self) -> &str { + pub fn operating_system(&self) -> &Option { &self.operating_system } - pub fn node_type(&self) -> &str { + pub fn node_type(&self) -> &Option { &self.node_type } - pub fn network_type(&self) -> &str { + pub fn network_type(&self) -> &Option { &self.network_type } pub fn from_public_key(public_key: BLSPubKey) -> Self { Self { public_key, - name: String::new(), - wallet_address: Default::default(), - ip_addresses: vec![], - company: String::new(), + name: None, + wallet_address: None, + ip_addresses: None, + company: None, location: None, - operating_system: String::new(), - node_type: String::new(), - network_type: String::new(), + operating_system: None, + node_type: None, + network_type: None, } } } @@ -110,14 +110,14 @@ pub mod tests { NodeIdentity::new( pub_key, - "a".to_string(), - Default::default(), - vec![IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1))], - "company".to_string(), + Some("a".to_string()), + Some(Default::default()), + Some(vec![IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1))]), + Some("company".to_string()), Some(LocationDetails::new((0.0, 0.0), "US".to_string())), - "Windows 11".to_string(), - "espresso".to_string(), - "residential".to_string(), + Some("Windows 11".to_string()), + Some("espresso".to_string()), + Some("residential".to_string()), ) } @@ -168,7 +168,7 @@ pub mod tests { let node_identity = create_test_node(1); let name = node_identity.name(); - assert_eq!(name, "a"); + assert_eq!(name, &Some("a".to_string())); } #[test] @@ -176,7 +176,7 @@ pub mod tests { let node_identity = create_test_node(1); let wallet_address = node_identity.wallet_address(); - assert_eq!(wallet_address, &Default::default()); + assert_eq!(wallet_address, &Some(Default::default())); } #[test] @@ -184,7 +184,10 @@ pub mod tests { let node_identity = create_test_node(1); let ip_addresses = node_identity.ip_addresses(); - assert_eq!(ip_addresses, &[IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1))]); + assert_eq!( + ip_addresses, + &Some(vec![IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1))]) + ); } #[test] @@ -192,7 +195,7 @@ pub mod tests { let node_identity = create_test_node(1); let company = node_identity.company(); - assert_eq!(company, "company"); + assert_eq!(company, &Some("company".to_string())); } #[test] @@ -211,7 +214,7 @@ pub mod tests { let node_identity = create_test_node(1); let operating_system = node_identity.operating_system(); - assert_eq!(operating_system, "Windows 11"); + assert_eq!(operating_system, &Some("Windows 11".to_string())); } #[test] @@ -219,7 +222,7 @@ pub mod tests { let node_identity = create_test_node(1); let node_type = node_identity.node_type(); - assert_eq!(node_type, "espresso"); + assert_eq!(node_type, &Some("espresso".to_string())); } #[test] @@ -227,6 +230,6 @@ pub mod tests { let node_identity = create_test_node(1); let network_type = node_identity.network_type(); - assert_eq!(network_type, "residential"); + assert_eq!(network_type, &Some("residential".to_string())); } } From d3b6924a7686bae0b70b880d35831bfcd34159cf Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Mon, 8 Jul 2024 15:50:23 -0600 Subject: [PATCH 18/72] Refactor `BitVec` for voters to be u16 By default `BitVec`s are `usize` in their representation. This works great for systems that are able to decode `u64` types from a JSON representation, but for languages that have a unified number system, such as Javascript, these are not representable accurately when decoded. In order to support these cases it is easier to just have the `BitVec` represent it's data in a format that is supported. `u32` could be used here, but for maximum compatibility, I've chosen to use `u16` instead. --- node-metrics/src/service/client_state/mod.rs | 4 ++-- node-metrics/src/service/data_state/mod.rs | 12 ++++++------ node-metrics/src/service/server_message/mod.rs | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/node-metrics/src/service/client_state/mod.rs b/node-metrics/src/service/client_state/mod.rs index bc526fcc2..581fe3411 100644 --- a/node-metrics/src/service/client_state/mod.rs +++ b/node-metrics/src/service/client_state/mod.rs @@ -817,7 +817,7 @@ async fn handle_received_node_identity( /// subscribed to the voters stream. async fn handle_received_voters( client_thread_state: Arc>, - voters: BitVec, + voters: BitVec, ) { let client_thread_state_read_lock_guard = client_thread_state.read().await; @@ -951,7 +951,7 @@ pub async fn process_distribute_voters_handling_stream( client_thread_state: Arc>, mut stream: S, ) where - S: Stream + Unpin, + S: Stream> + Unpin, { loop { let voters_result = stream.next().await; diff --git a/node-metrics/src/service/data_state/mod.rs b/node-metrics/src/service/data_state/mod.rs index 155a8fb7a..3d7d24225 100644 --- a/node-metrics/src/service/data_state/mod.rs +++ b/node-metrics/src/service/data_state/mod.rs @@ -38,7 +38,7 @@ const MAX_HISTORY: usize = 50; #[cfg_attr(test, derive(Default))] pub struct DataState { latest_blocks: CircularBuffer>, - latest_voters: CircularBuffer, + latest_voters: CircularBuffer>, stake_table: StakeTable, // Do we need any other data at the moment? node_identity: Vec<(BLSPubKey, NodeIdentity)>, @@ -47,7 +47,7 @@ pub struct DataState { impl DataState { pub fn new( latest_blocks: CircularBuffer>, - latest_voters: CircularBuffer, + latest_voters: CircularBuffer>, stake_table: StakeTable, node_identity: Vec<(BLSPubKey, NodeIdentity)>, ) -> Self { @@ -63,7 +63,7 @@ impl DataState { self.latest_blocks.iter() } - pub fn latest_voters(&self) -> impl Iterator { + pub fn latest_voters(&self) -> impl Iterator> { self.latest_voters.iter() } @@ -86,7 +86,7 @@ impl DataState { self.latest_blocks.push_back(block); } - pub fn add_latest_voters(&mut self, voters: BitVec) { + pub fn add_latest_voters(&mut self, voters: BitVec) { self.latest_voters.push_back(voters); } @@ -152,7 +152,7 @@ async fn process_incoming_leaf( leaf: Leaf, data_state: Arc>, mut block_sender: Sender>, - mut voters_sender: Sender, + mut voters_sender: Sender>, ) -> Result<(), ProcessLeafError> where Header: BlockHeader + QueryableHeader + ExplorerHeader, @@ -249,7 +249,7 @@ pub async fn process_leaf_stream( mut stream: S, data_state: Arc>, block_sender: Sender>, - voters_senders: Sender, + voters_senders: Sender>, ) where S: Stream> + Unpin, Header: BlockHeader + QueryableHeader + ExplorerHeader, diff --git a/node-metrics/src/service/server_message/mod.rs b/node-metrics/src/service/server_message/mod.rs index 87fafdf82..5bc348c56 100644 --- a/node-metrics/src/service/server_message/mod.rs +++ b/node-metrics/src/service/server_message/mod.rs @@ -23,7 +23,7 @@ pub enum ServerMessage { /// LatestVoters is a message that is meant to show the most recent /// voters that have arrived. - LatestVoters(BitVec), + LatestVoters(BitVec), /// BlocksSnapshot is a message that is sent in response to a request for /// the snapshot of block information that is available. @@ -39,7 +39,7 @@ pub enum ServerMessage { /// VotersSnapshot is a message that is sent in response to a request for /// the snapshot of the current voters information. - VotersSnapshot(Arc>), + VotersSnapshot(Arc>>), } impl PartialEq for ServerMessage { From ae8f04188eb6af4626c2510bc999c93705c1f1ad Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Mon, 8 Jul 2024 15:53:05 -0600 Subject: [PATCH 19/72] Add structure for decoding StakeTable from sequencer In order to retrieve the StakeTable it is beneficial to have a function implementation to retrieve the StakeTable from a Sequencer. --- node-metrics/src/api/node_validator/v0/mod.rs | 75 +++++++++++++++++-- 1 file changed, 68 insertions(+), 7 deletions(-) diff --git a/node-metrics/src/api/node_validator/v0/mod.rs b/node-metrics/src/api/node_validator/v0/mod.rs index 6f803e015..aa5a078b8 100644 --- a/node-metrics/src/api/node_validator/v0/mod.rs +++ b/node-metrics/src/api/node_validator/v0/mod.rs @@ -5,6 +5,11 @@ use futures::{ channel::mpsc::{self, Sender}, FutureExt, SinkExt, StreamExt, }; +use hotshot_stake_table::vec_based::StakeTable; +use hotshot_types::light_client::{CircuitField, StateVerKey}; +use hotshot_types::signature_key::BLSPubKey; +use hotshot_types::traits::{signature_key::StakeTableEntryType, stake_table::StakeTableScheme}; +use hotshot_types::PeerConfig; use serde::{Deserialize, Serialize}; use std::fmt; use tide_disco::socket::Connection; @@ -284,6 +289,55 @@ where Ok(api) } +#[derive(Debug, Deserialize)] +pub struct PublishHotShotConfig { + pub known_nodes_with_stake: Vec>, +} + +/// [get_stake_table_from_sequencer] retrieves the stake table from the +/// Sequencer. It expects a [surf_disco::Client] to be provided so that it can +/// make the request to the Hotshot Query Service. It will return a +/// [StakeTable] that is populated with the data retrieved from the Hotshot +/// Query Service. +pub async fn get_stake_table_from_sequencer( + client: surf_disco::Client, +) -> Result, hotshot_query_service::Error> { + let request = client + .get("config/hotshot") + // We need to set the Accept header, otherwise the Content-Type + // will be application/octet-stream, and we won't be able to + // deserialize the response. + .header("Accept", "application/json"); + let stake_table_result = request.send().await; + + let public_hot_shot_config: PublishHotShotConfig = match stake_table_result { + Ok(public_hot_shot_config) => public_hot_shot_config, + Err(err) => { + tracing::info!("retrieve stake table request failed: {}", err); + return Err(err); + } + }; + + let mut stake_table = StakeTable::::new( + public_hot_shot_config.known_nodes_with_stake.len(), + ); + + for node in public_hot_shot_config.known_nodes_with_stake.into_iter() { + stake_table + .register( + *node.stake_table_entry.key(), + node.stake_table_entry.stake(), + node.state_ver_key, + ) + .expect("registering stake table entry"); + } + + stake_table.advance(); + stake_table.advance(); + + Ok(stake_table) +} + /// [stream_leaves_from_hotshot_query_service] retrieves a stream of /// [sequencer::Leaf]s from the Hotshot Query Service. It expects a /// [current_block_height] to be provided so that it can determine the starting @@ -336,7 +390,8 @@ pub async fn stream_leaves_from_hotshot_query_service( #[cfg(test)] mod tests { use super::{ - stream_leaves_from_hotshot_query_service, Error, StateClientMessageSender, STATIC_VER_0_1, + get_stake_table_from_sequencer, stream_leaves_from_hotshot_query_service, Error, + StateClientMessageSender, STATIC_VER_0_1, }; use crate::service::{ client_id::ClientId, @@ -385,7 +440,7 @@ mod tests { panic!("Error: {:?}", e); } - let data_state = DataState::new( + let mut data_state = DataState::new( Default::default(), Default::default(), Default::default(), @@ -400,6 +455,16 @@ mod tests { ClientId::from_count(1), ); + let client = surf_disco::Client::new( + "https://query.cappuccino.testnet.espresso.network/v0" + .parse() + .unwrap(), + ); + + let get_stake_table_result = get_stake_table_from_sequencer(client.clone()).await; + let stake_table = get_stake_table_result.unwrap(); + data_state.replace_stake_table(stake_table); + let data_state = Arc::new(RwLock::new(data_state)); let client_thread_state = Arc::new(RwLock::new(client_thread_state)); let (block_detail_sender, block_detail_receiver) = mpsc::channel(32); @@ -440,11 +505,7 @@ mod tests { let _leaf_retriever_handle = async_std::task::spawn(async move { // Alright, let's get some leaves, bro - let client = surf_disco::Client::new( - "https://query.cappuccino.testnet.espresso.network/v0" - .parse() - .unwrap(), - ); + let client = client; let mut leaf_stream = stream_leaves_from_hotshot_query_service(None, client) .await From 371dd8e2b3639c66d47f3e34cf0611086b759b1a Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Mon, 8 Jul 2024 15:56:28 -0600 Subject: [PATCH 20/72] Refactor Node Identity storage in DataState The Node Identity stored in DataState was stored as a tuple of keys to `NodeIdentity`. However, `NodeIdentity` already stores the public key, so it is unnecessary, especially following the update where the `NodeIdentity` itself has every other field as being optional. --- node-metrics/src/service/client_state/mod.rs | 4 +- node-metrics/src/service/data_state/mod.rs | 59 ++++++++++++++++++-- 2 files changed, 55 insertions(+), 8 deletions(-) diff --git a/node-metrics/src/service/client_state/mod.rs b/node-metrics/src/service/client_state/mod.rs index 581fe3411..a878917a3 100644 --- a/node-metrics/src/service/client_state/mod.rs +++ b/node-metrics/src/service/client_state/mod.rs @@ -331,7 +331,7 @@ pub async fn handle_client_message_request_node_identity_snapshot( // Let's copy the current node identity snapshot and send them let nodes = data_state_read_lock_guard .node_identity() - .map(|(_, node)| node.clone()) + .cloned() .collect::>(); if let Err(err) = sender @@ -824,7 +824,7 @@ async fn handle_received_voters( // These are the clients who are subscribed to the node identities, that // have an active ClientState within the system. let node_identity_subscribers = client_thread_state_read_lock_guard - .subscribed_node_identity + .subscribed_voters .iter() .map(|client_id| { ( diff --git a/node-metrics/src/service/data_state/mod.rs b/node-metrics/src/service/data_state/mod.rs index 3d7d24225..91bc38725 100644 --- a/node-metrics/src/service/data_state/mod.rs +++ b/node-metrics/src/service/data_state/mod.rs @@ -41,7 +41,7 @@ pub struct DataState { latest_voters: CircularBuffer>, stake_table: StakeTable, // Do we need any other data at the moment? - node_identity: Vec<(BLSPubKey, NodeIdentity)>, + node_identity: Vec, } impl DataState { @@ -49,7 +49,7 @@ impl DataState { latest_blocks: CircularBuffer>, latest_voters: CircularBuffer>, stake_table: StakeTable, - node_identity: Vec<(BLSPubKey, NodeIdentity)>, + node_identity: Vec, ) -> Self { Self { latest_blocks, @@ -71,7 +71,7 @@ impl DataState { &self.stake_table } - pub fn node_identity(&self) -> impl Iterator { + pub fn node_identity(&self) -> impl Iterator { self.node_identity.iter() } @@ -80,6 +80,30 @@ impl DataState { stake_table: StakeTable, ) { self.stake_table = stake_table; + + // We want to make sure that we're accounting for this node identity + // information that we have. In the case of any new public keys + // being added, we want to ensure we have an entry for them in our + // node identity list. + + let current_identity_set = self + .node_identity + .iter() + .map(|node_identity| *node_identity.public_key()) + .collect::>(); + + let stake_table_iter_result = self.stake_table.try_iter(SnapshotVersion::Head); + let stake_table_iter = match stake_table_iter_result { + Ok(into_iter) => into_iter, + Err(_) => return, + }; + + let missing_node_identity_entries = + stake_table_iter.filter(|(key, _, _)| !current_identity_set.contains(key)); + + self.node_identity.extend( + missing_node_identity_entries.map(|(key, _, _)| NodeIdentity::from_public_key(key)), + ); } pub fn add_latest_block(&mut self, block: BlockDetail) { @@ -91,7 +115,30 @@ impl DataState { } pub fn add_node_identity(&mut self, identity: NodeIdentity) { - self.node_identity.push((*identity.public_key(), identity)); + // We need to check to see if this identity is already in the list, + // if it is, we will want to replace it. + + let pub_key = identity.public_key(); + + let mut matching_public_keys = self + .node_identity + .iter() + // We want the index of the entry for easier editing + .enumerate() + .filter(|(_, node_identity)| node_identity.public_key() == pub_key); + + // We only expect this have a single entry. + let existing_node_identity_option = matching_public_keys.next(); + + debug_assert_eq!(matching_public_keys.next(), None); + + if let Some((index, _)) = existing_node_identity_option { + self.node_identity[index] = identity; + return; + } + + // This entry doesn't appear in our table, so let's add it. + self.node_identity.push(identity); } } @@ -215,8 +262,8 @@ where let voters_bitvec = data_state_write_lock_guard.node_identity.iter().fold( BitVec::with_capacity(data_state_write_lock_guard.node_identity.len()), - |mut acc, key| { - acc.push(voters_set.contains(&key.0)); + |mut acc, node_identity| { + acc.push(voters_set.contains(node_identity.public_key())); acc }, ); From ede69941bb71879fa37dd2dea097d0291b7a088c Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Tue, 9 Jul 2024 15:46:39 -0600 Subject: [PATCH 21/72] Replace ip addresses in node identity with public url Due to the potential difficulty there can be in knowing your standing IP Address on cloud platforms, the requirement of knowing the ip address of a node has been replaced with a public url instead. This public url should be the base url for an API endpoint for the sequencer. --- node-metrics/src/service/client_state/mod.rs | 12 +++------ .../src/service/data_state/node_identity.rs | 26 +++++++++---------- 2 files changed, 16 insertions(+), 22 deletions(-) diff --git a/node-metrics/src/service/client_state/mod.rs b/node-metrics/src/service/client_state/mod.rs index a878917a3..b2de94893 100644 --- a/node-metrics/src/service/client_state/mod.rs +++ b/node-metrics/src/service/client_state/mod.rs @@ -989,11 +989,7 @@ pub mod tests { use futures::{channel::mpsc, SinkExt, StreamExt}; use hotshot_types::{signature_key::BLSPubKey, traits::signature_key::SignatureKey}; use sequencer::{Leaf, NodeState, ValidatedState}; - use std::{ - net::{IpAddr, Ipv4Addr}, - sync::Arc, - time::Duration, - }; + use std::{sync::Arc, time::Duration}; pub fn create_test_client_thread_state() -> ClientThreadState { ClientThreadState { @@ -1012,7 +1008,7 @@ pub mod tests { pub_key, Some("a".to_string()), Some(Default::default()), - Some(vec![IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1))]), + Some("http://localhost/".parse().unwrap()), Some("company".to_string()), Some(LocationDetails::new((0.0, 0.0), "US".to_string())), Some("Windows 11".to_string()), @@ -1027,7 +1023,7 @@ pub mod tests { pub_key, Some("b".to_string()), Some(Default::default()), - Some(vec![IpAddr::V4(Ipv4Addr::new(127, 0, 0, 2))]), + Some("http://localhost/".parse().unwrap()), Some("company".to_string()), Some(LocationDetails::new((0.0, 0.0), "US".to_string())), Some("Windows 11".to_string()), @@ -1042,7 +1038,7 @@ pub mod tests { pub_key, Some("b".to_string()), Some(Default::default()), - Some(vec![IpAddr::V4(Ipv4Addr::new(127, 0, 0, 3))]), + Some("http://localhost/".parse().unwrap()), Some("company".to_string()), Some(LocationDetails::new((0.0, 0.0), "US".to_string())), Some("Windows 11".to_string()), diff --git a/node-metrics/src/service/data_state/node_identity.rs b/node-metrics/src/service/data_state/node_identity.rs index 4a1c9806e..d6270a07c 100644 --- a/node-metrics/src/service/data_state/node_identity.rs +++ b/node-metrics/src/service/data_state/node_identity.rs @@ -2,7 +2,7 @@ use super::LocationDetails; use hotshot_types::signature_key::BLSPubKey; use sequencer::state::FeeAccount; use serde::{Deserialize, Serialize}; -use std::net::IpAddr; +use surf_disco::Url; /// [NodeIdentity] represents the identity of the node that is participating /// in the network. @@ -11,7 +11,7 @@ pub struct NodeIdentity { public_key: BLSPubKey, name: Option, wallet_address: Option, - ip_addresses: Option>, + public_url: Option, company: Option, location: Option, operating_system: Option, @@ -25,7 +25,7 @@ impl NodeIdentity { public_key: BLSPubKey, name: Option, wallet_address: Option, - ip_addresses: Option>, + public_url: Option, company: Option, location: Option, operating_system: Option, @@ -36,7 +36,7 @@ impl NodeIdentity { public_key, name, wallet_address, - ip_addresses, + public_url, company, location, operating_system, @@ -57,8 +57,8 @@ impl NodeIdentity { &self.wallet_address } - pub fn ip_addresses(&self) -> &Option> { - &self.ip_addresses + pub fn public_url(&self) -> &Option { + &self.public_url } pub fn company(&self) -> &Option { @@ -86,7 +86,7 @@ impl NodeIdentity { public_key, name: None, wallet_address: None, - ip_addresses: None, + public_url: None, company: None, location: None, operating_system: None, @@ -102,8 +102,6 @@ pub mod tests { use super::NodeIdentity; use hotshot_types::signature_key::BLSPubKey; use hotshot_types::traits::signature_key::SignatureKey; - use std::net::IpAddr; - use std::net::Ipv4Addr; pub fn create_test_node(index: u64) -> NodeIdentity { let (pub_key, _) = BLSPubKey::generated_from_seed_indexed([0; 32], index); @@ -112,7 +110,7 @@ pub mod tests { pub_key, Some("a".to_string()), Some(Default::default()), - Some(vec![IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1))]), + Some("https://espressosys.com/".parse().unwrap()), Some("company".to_string()), Some(LocationDetails::new((0.0, 0.0), "US".to_string())), Some("Windows 11".to_string()), @@ -180,13 +178,13 @@ pub mod tests { } #[test] - fn test_node_identity_ip_addresses() { + fn test_node_identity_public_url() { let node_identity = create_test_node(1); - let ip_addresses = node_identity.ip_addresses(); + let public_url = node_identity.public_url(); assert_eq!( - ip_addresses, - &Some(vec![IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1))]) + public_url, + &Some("https://espressosys.com/".parse().unwrap()), ); } From 9717b2585a4e50c799f255bac372b2956375322b Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Thu, 11 Jul 2024 07:47:15 -0600 Subject: [PATCH 22/72] Add functions to parse Node Identity information from prometheus metrics We intended to be able to retrieve Node Identity information from the prometheus metrics. As such we need a function that can handle this interaction. Using the plugin for `prometheus-parse`, and `reqwest` we can retrieve the data given a valid base URL to work with. From there we can parse the data that is available in the resulting `Scrape` object to fill in the missing pieces of Node Identity information. --- node-metrics/Cargo.toml | 4 + node-metrics/src/api/node_validator/v0/mod.rs | 379 +++++++++++++++++- node-metrics/src/service/client_state/mod.rs | 15 +- .../service/data_state/location_details.rs | 30 +- .../src/service/data_state/node_identity.rs | 28 +- 5 files changed, 426 insertions(+), 30 deletions(-) diff --git a/node-metrics/Cargo.toml b/node-metrics/Cargo.toml index 99a32388c..1c46e578f 100644 --- a/node-metrics/Cargo.toml +++ b/node-metrics/Cargo.toml @@ -17,14 +17,18 @@ futures = { workspace = true } hotshot-query-service = { workspace = true } hotshot-stake-table = { workspace = true } hotshot-types = { workspace = true } +prometheus-parse = { version = "^0.2.5" } +reqwest = { workspace = true } sequencer = { path = "../sequencer" } serde = { workspace = true } serde_json = { version = "^1.0.113", optional = true } surf-disco = { workspace = true } +tagged-base64 = { workspace = true } tide-disco = { version = "0.9.0" } time = { workspace = true } toml = { workspace = true } tracing = { workspace = true } +url = { workspace = true } vbs = { workspace = true } # Dependencies for feature `testing` diff --git a/node-metrics/src/api/node_validator/v0/mod.rs b/node-metrics/src/api/node_validator/v0/mod.rs index aa5a078b8..80d968171 100644 --- a/node-metrics/src/api/node_validator/v0/mod.rs +++ b/node-metrics/src/api/node_validator/v0/mod.rs @@ -1,4 +1,5 @@ use crate::service::client_message::{ClientMessage, InternalClientMessage}; +use crate::service::data_state::{LocationDetails, NodeIdentity}; use crate::service::server_message::ServerMessage; use futures::future::Either; use futures::{ @@ -8,10 +9,13 @@ use futures::{ use hotshot_stake_table::vec_based::StakeTable; use hotshot_types::light_client::{CircuitField, StateVerKey}; use hotshot_types::signature_key::BLSPubKey; +use hotshot_types::traits::signature_key::SignatureKey; use hotshot_types::traits::{signature_key::StakeTableEntryType, stake_table::StakeTableScheme}; use hotshot_types::PeerConfig; +use prometheus_parse::Scrape; use serde::{Deserialize, Serialize}; use std::fmt; +use std::io::BufRead; use tide_disco::socket::Connection; use tide_disco::{api::ApiError, Api}; use vbs::version::{StaticVersion, StaticVersionType, Version}; @@ -338,6 +342,51 @@ pub async fn get_stake_table_from_sequencer( Ok(stake_table) } +pub enum GetNodeIdentityFromUrlError { + Url(url::ParseError), + Reqwest(reqwest::Error), + Io(std::io::Error), + NoNodeIdentity, +} + +impl From for GetNodeIdentityFromUrlError { + fn from(err: url::ParseError) -> Self { + GetNodeIdentityFromUrlError::Url(err) + } +} + +impl From for GetNodeIdentityFromUrlError { + fn from(err: reqwest::Error) -> Self { + GetNodeIdentityFromUrlError::Reqwest(err) + } +} + +impl From for GetNodeIdentityFromUrlError { + fn from(err: std::io::Error) -> Self { + GetNodeIdentityFromUrlError::Io(err) + } +} + +pub async fn get_node_identity_from_url( + url: url::Url, +) -> Result { + let client = reqwest::Client::new(); + + let completed_url = url.join("v0/status/metrics")?; + let request = client.get(completed_url).build()?; + let response = client.execute(request).await?; + let response_bytes = response.bytes().await?; + + let buffered_response = std::io::BufReader::new(&*response_bytes); + let scrape = prometheus_parse::Scrape::parse(buffered_response.lines())?; + + if let Some(node_identity) = node_identity_from_scrape(scrape) { + Ok(node_identity) + } else { + Err(GetNodeIdentityFromUrlError::NoNodeIdentity) + } +} + /// [stream_leaves_from_hotshot_query_service] retrieves a stream of /// [sequencer::Leaf]s from the Hotshot Query Service. It expects a /// [current_block_height] to be provided so that it can determine the starting @@ -387,6 +436,137 @@ pub async fn stream_leaves_from_hotshot_query_service( Ok(leaves_stream) } +pub fn populate_node_identity_from_scrape(node_identity: &mut NodeIdentity, scrape: Scrape) { + // Handle General Information Population + + // Determine the key for the "consensus_node_identity_general" sample + let node_identity_general_key = scrape + .docs + .iter() + .find(|(_, key)| key == &"node_identity_general") + .map(|(key, _)| key); + + if let Some(node_identity_general_key) = node_identity_general_key { + let node_identity_general_sample = scrape + .samples + .iter() + .find(|sample| &sample.metric == node_identity_general_key); + + if let Some(node_identity_general_sample) = node_identity_general_sample { + node_identity.name = node_identity_general_sample + .labels + .get("name") + .map(|s| s.into()); + node_identity.company = node_identity_general_sample + .labels + .get("company_name") + .map(|s| s.into()); + node_identity.network_type = node_identity_general_sample + .labels + .get("network_type") + .map(|s| s.into()); + node_identity.node_type = node_identity_general_sample + .labels + .get("node_type") + .map(|s| s.into()); + node_identity.operating_system = node_identity_general_sample + .labels + .get("operating_system") + .map(|s| s.into()); + + // node_identity.wallet_address = node_identity_general.labels.get("wallet").map(|s| s.into()); + } + } + + let node_identity_location_key = scrape + .docs + .iter() + .find(|(_, key)| key == &"node_identity_location") + .map(|(key, _)| key); + if let Some(node_identity_location_key) = node_identity_location_key { + let node_identity_location_sample = scrape + .samples + .iter() + .find(|sample| &sample.metric == node_identity_location_key); + + // We either have an existing location, or we'd potentially like to create + // one. + + if let Some(node_identity_location_sample) = node_identity_location_sample { + let mut location = node_identity + .location + .take() + .unwrap_or(LocationDetails::new(None, None)); + location.country = node_identity_location_sample + .labels + .get("country") + .map(|s| s.into()); + + let latitude = node_identity_location_sample + .labels + .get("latitude") + .map(|s| s.parse::()); + let longitude = node_identity_location_sample + .labels + .get("latitude") + .map(|s| s.parse::()); + + if let (Some(Ok(latitude)), Some(Ok(longitude))) = (latitude, longitude) { + location.coords = Some((latitude, longitude)); + } + + // Are there any details populated? + if location.country.is_some() || location.coords.is_some() { + node_identity.location = Some(location); + } else { + node_identity.location = None; + } + } + } +} + +pub fn node_identity_from_scrape(scrape: Scrape) -> Option { + let node_key = scrape + .docs + .iter() + .find(|(_, key)| key == &"node") + .map(|(key, _)| key); + + let node_key = node_key?; + + let node_sample = scrape + .samples + .iter() + .find(|sample| &sample.metric == node_key); + + let node_sample = node_sample?; + + let public_key_string = node_sample.labels.get("key")?; + + // create the Tagged Base 64 Public Key representation + let tagged_base64 = + if let Ok(tagged_base64) = tagged_base64::TaggedBase64::parse(public_key_string) { + tagged_base64 + } else { + return None; + }; + + // Now we can take those bytes and we can create a Public Key from them. + let public_key = match BLSPubKey::from_bytes(tagged_base64.value().as_ref()) { + Ok(public_key) => public_key, + Err(err) => { + // We couldn't parse the public key, so we can't create a NodeIdentity. + tracing::info!("parsing public key failed: {}", err); + return None; + } + }; + + let mut node_identity = NodeIdentity::from_public_key(public_key); + populate_node_identity_from_scrape(&mut node_identity, scrape); + + Some(node_identity) +} + #[cfg(test)] mod tests { use super::{ @@ -409,7 +589,10 @@ mod tests { channel::mpsc::{self, Sender}, SinkExt, StreamExt, }; - use std::sync::Arc; + use std::{ + io::{BufRead, BufReader}, + sync::Arc, + }; use tide_disco::App; struct TestState(Sender); @@ -532,4 +715,198 @@ mod tests { let _app_serve_result = app.serve("0.0.0.0:9000", STATIC_VER_0_1).await; } + + fn example_prometheus_output() -> &'static str { + "# HELP consensus_cdn_num_failed_messages num_failed_messages +# TYPE consensus_cdn_num_failed_messages counter +consensus_cdn_num_failed_messages 0 +# HELP consensus_current_view current_view +# TYPE consensus_current_view gauge +consensus_current_view 7 +# HELP consensus_invalid_qc invalid_qc +# TYPE consensus_invalid_qc gauge +consensus_invalid_qc 0 +# HELP consensus_last_decided_time last_decided_time +# TYPE consensus_last_decided_time gauge +consensus_last_decided_time 1720537017 +# HELP consensus_last_decided_view last_decided_view +# TYPE consensus_last_decided_view gauge +consensus_last_decided_view 4 +# HELP consensus_last_synced_block_height last_synced_block_height +# TYPE consensus_last_synced_block_height gauge +consensus_last_synced_block_height 4 +# HELP consensus_libp2p_num_connected_peers num_connected_peers +# TYPE consensus_libp2p_num_connected_peers gauge +consensus_libp2p_num_connected_peers 4 +# HELP consensus_libp2p_num_failed_messages num_failed_messages +# TYPE consensus_libp2p_num_failed_messages counter +consensus_libp2p_num_failed_messages 0 +# HELP consensus_node node +# TYPE consensus_node gauge +consensus_node{key=\"BLS_VER_KEY~bQszS-QKYvUij2g20VqS8asttGSb95NrTu2PUj0uMh1CBUxNy1FqyPDjZqB29M7ZbjWqj79QkEOWkpga84AmDYUeTuWmy-0P1AdKHD3ehc-dKvei78BDj5USwXPJiDUlCxvYs_9rWYhagaq-5_LXENr78xel17spftNd5MA1Mw5U\"} 1 +# HELP consensus_node_identity_general node_identity_general +# TYPE consensus_node_identity_general gauge +consensus_node_identity_general{company_name=\"Espresso Systems\",name=\"sequencer0\",network_type=\"local\",node_type=\"espresso-sequencer 0.1\",operating_system=\"Linux 5.15.153.1\",wallet=\"0x00000000000000000000000000000000\"} 1 +# HELP consensus_node_identity_location node_identity_location +# TYPE consensus_node_identity_location gauge +consensus_node_identity_location{country=\"US\",latitude=\"-40.7128\",longitude=\"-74.0060\"} 1 +# HELP consensus_node_index node_index +# TYPE consensus_node_index gauge +consensus_node_index 4 +# HELP consensus_number_of_empty_blocks_proposed number_of_empty_blocks_proposed +# TYPE consensus_number_of_empty_blocks_proposed counter +consensus_number_of_empty_blocks_proposed 1 +# HELP consensus_number_of_timeouts number_of_timeouts +# TYPE consensus_number_of_timeouts counter +consensus_number_of_timeouts 0 +# HELP consensus_number_of_timeouts_as_leader number_of_timeouts_as_leader +# TYPE consensus_number_of_timeouts_as_leader counter +consensus_number_of_timeouts_as_leader 0 +# HELP consensus_number_of_views_per_decide_event number_of_views_per_decide_event +# TYPE consensus_number_of_views_per_decide_event histogram +consensus_number_of_views_per_decide_event_bucket{le=\"0.005\"} 0 +consensus_number_of_views_per_decide_event_bucket{le=\"0.01\"} 0 +consensus_number_of_views_per_decide_event_bucket{le=\"0.025\"} 0 +consensus_number_of_views_per_decide_event_bucket{le=\"0.05\"} 0 +consensus_number_of_views_per_decide_event_bucket{le=\"0.1\"} 0 +consensus_number_of_views_per_decide_event_bucket{le=\"0.25\"} 0 +consensus_number_of_views_per_decide_event_bucket{le=\"0.5\"} 0 +consensus_number_of_views_per_decide_event_bucket{le=\"1\"} 0 +consensus_number_of_views_per_decide_event_bucket{le=\"2.5\"} 0 +consensus_number_of_views_per_decide_event_bucket{le=\"5\"} 4 +consensus_number_of_views_per_decide_event_bucket{le=\"10\"} 4 +consensus_number_of_views_per_decide_event_bucket{le=\"+Inf\"} 4 +consensus_number_of_views_per_decide_event_sum 12 +consensus_number_of_views_per_decide_event_count 4 +# HELP consensus_number_of_views_since_last_decide number_of_views_since_last_decide +# TYPE consensus_number_of_views_since_last_decide gauge +consensus_number_of_views_since_last_decide 4 +# HELP consensus_outstanding_transactions outstanding_transactions +# TYPE consensus_outstanding_transactions gauge +consensus_outstanding_transactions 0 +# HELP consensus_outstanding_transactions_memory_size outstanding_transactions_memory_size +# TYPE consensus_outstanding_transactions_memory_size gauge +consensus_outstanding_transactions_memory_size 0 +# HELP consensus_version version +# TYPE consensus_version gauge +consensus_version{desc=\"20240701-15-gbd0957fd-dirty\",rev=\"bd0957fddad19caab010dc59e5a92bc1c95cbc07\",timestamp=\"1980-01-01T00:00:00.000000000Z\"} 1 +# HELP consensus_view_duration_as_leader view_duration_as_leader +# TYPE consensus_view_duration_as_leader histogram +consensus_view_duration_as_leader_bucket{le=\"0.005\"} 0 +consensus_view_duration_as_leader_bucket{le=\"0.01\"} 0 +consensus_view_duration_as_leader_bucket{le=\"0.025\"} 0 +consensus_view_duration_as_leader_bucket{le=\"0.05\"} 0 +consensus_view_duration_as_leader_bucket{le=\"0.1\"} 0 +consensus_view_duration_as_leader_bucket{le=\"0.25\"} 0 +consensus_view_duration_as_leader_bucket{le=\"0.5\"} 0 +consensus_view_duration_as_leader_bucket{le=\"1\"} 0 +consensus_view_duration_as_leader_bucket{le=\"2.5\"} 1 +consensus_view_duration_as_leader_bucket{le=\"5\"} 1 +consensus_view_duration_as_leader_bucket{le=\"10\"} 1 +consensus_view_duration_as_leader_bucket{le=\"+Inf\"} 1 +consensus_view_duration_as_leader_sum 2 +consensus_view_duration_as_leader_count 1" + } + + #[test] + fn test_prometheus_scraping_example() { + let example_input = example_prometheus_output(); + + let buffered_reader = BufReader::new(example_input.as_bytes()); + let lines = buffered_reader.lines(); + + let scrape_result = prometheus_parse::Scrape::parse(lines); + + assert!(scrape_result.is_ok()); + let scrape = scrape_result.unwrap(); + + let node_identity_general_key = scrape + .docs + .iter() + .find(|(_, key)| key == &"node_identity_general") + .map(|(key, _)| key); + let node_identity_location_key = scrape + .docs + .iter() + .find(|(_, key)| key == &"node_identity_location") + .map(|(key, _)| key); + + assert!(node_identity_general_key.is_some()); + assert!(node_identity_location_key.is_some()); + + let node_identity_general_key = node_identity_general_key.unwrap(); + let node_identity_location_key = node_identity_location_key.unwrap(); + + // Let's look for the general_info + let node_identity_general = scrape + .samples + .iter() + .find(|sample| &sample.metric == node_identity_general_key); + + let node_identity_location = scrape + .samples + .iter() + .find(|sample| &sample.metric == node_identity_location_key); + + assert!(node_identity_general.is_some()); + assert!(node_identity_location.is_some()); + + let node_identity_general = node_identity_general.unwrap(); + let node_identity_location = node_identity_location.unwrap(); + + assert_eq!( + node_identity_general.labels.get("company_name"), + Some("Espresso Systems") + ); + assert_eq!(node_identity_general.labels.get("name"), Some("sequencer0")); + assert_eq!( + node_identity_general.labels.get("network_type"), + Some("local") + ); + assert_eq!( + node_identity_general.labels.get("node_type"), + Some("espresso-sequencer 0.1") + ); + assert_eq!( + node_identity_general.labels.get("node_type"), + Some("espresso-sequencer 0.1") + ); + assert_eq!( + node_identity_general.labels.get("operating_system"), + Some("Linux 5.15.153.1") + ); + assert_eq!( + node_identity_general.labels.get("wallet"), + Some("0x00000000000000000000000000000000") + ); + + assert_eq!(node_identity_location.labels.get("country"), Some("US")); + assert_eq!( + node_identity_location.labels.get("latitude"), + Some("-40.7128") + ); + assert_eq!( + node_identity_location.labels.get("longitude"), + Some("-74.0060") + ); + + print!("{:?}", scrape); + } + + #[test] + fn test_node_identity_from_scrape() { + let example_input = example_prometheus_output(); + + let buffered_reader = BufReader::new(example_input.as_bytes()); + let lines = buffered_reader.lines(); + + let scrape_result = prometheus_parse::Scrape::parse(lines); + + assert!(scrape_result.is_ok()); + let scrape = scrape_result.unwrap(); + + let node_identity = super::node_identity_from_scrape(scrape); + + assert!(node_identity.is_some()); + } } diff --git a/node-metrics/src/service/client_state/mod.rs b/node-metrics/src/service/client_state/mod.rs index b2de94893..12f091792 100644 --- a/node-metrics/src/service/client_state/mod.rs +++ b/node-metrics/src/service/client_state/mod.rs @@ -1010,7 +1010,10 @@ pub mod tests { Some(Default::default()), Some("http://localhost/".parse().unwrap()), Some("company".to_string()), - Some(LocationDetails::new((0.0, 0.0), "US".to_string())), + Some(LocationDetails::new( + Some((0.0, 0.0)), + Some("US".to_string()), + )), Some("Windows 11".to_string()), Some("espresso".to_string()), Some("residential".to_string()), @@ -1025,7 +1028,10 @@ pub mod tests { Some(Default::default()), Some("http://localhost/".parse().unwrap()), Some("company".to_string()), - Some(LocationDetails::new((0.0, 0.0), "US".to_string())), + Some(LocationDetails::new( + Some((0.0, 0.0)), + Some("US".to_string()), + )), Some("Windows 11".to_string()), Some("espresso".to_string()), Some("residential".to_string()), @@ -1040,7 +1046,10 @@ pub mod tests { Some(Default::default()), Some("http://localhost/".parse().unwrap()), Some("company".to_string()), - Some(LocationDetails::new((0.0, 0.0), "US".to_string())), + Some(LocationDetails::new( + Some((0.0, 0.0)), + Some("US".to_string()), + )), Some("Windows 11".to_string()), Some("espresso".to_string()), Some("residential".to_string()), diff --git a/node-metrics/src/service/data_state/location_details.rs b/node-metrics/src/service/data_state/location_details.rs index dc01b6bd9..ebdca0453 100644 --- a/node-metrics/src/service/data_state/location_details.rs +++ b/node-metrics/src/service/data_state/location_details.rs @@ -3,20 +3,20 @@ use serde::{Deserialize, Serialize}; /// [LocationDetails] represents the details of the location of the node. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct LocationDetails { - coords: (f64, f64), - country: String, + pub coords: Option<(f64, f64)>, + pub country: Option, } impl LocationDetails { - pub fn new(coords: (f64, f64), country: String) -> Self { + pub fn new(coords: Option<(f64, f64)>, country: Option) -> Self { Self { coords, country } } - pub fn coords(&self) -> (f64, f64) { - self.coords + pub fn coords(&self) -> &Option<(f64, f64)> { + &self.coords } - pub fn country(&self) -> &str { + pub fn country(&self) -> &Option { &self.country } } @@ -29,26 +29,26 @@ mod tests { fn test_location_details_coords() { let coords = (0.0, 0.0); let country = "US".to_string(); - let location_details = LocationDetails::new(coords, country.clone()); + let location_details = LocationDetails::new(Some(coords), Some(country.clone())); - assert_eq!(location_details.coords(), coords); + assert_eq!(location_details.coords(), &Some(coords)); } #[test] fn test_location_details_country() { let coords = (0.0, 0.0); let country = "US".to_string(); - let location_details = LocationDetails::new(coords, country.clone()); + let location_details = LocationDetails::new(Some(coords), Some(country.clone())); - assert_eq!(location_details.country(), country); + assert_eq!(location_details.country(), &Some(country)); } #[test] fn test_location_details_eq() { let coords = (0.0, 0.0); let country = "US".to_string(); - let location_details = LocationDetails::new(coords, country.clone()); - let location_details_2 = LocationDetails::new(coords, country.clone()); + let location_details = LocationDetails::new(Some(coords), Some(country.clone())); + let location_details_2 = LocationDetails::new(Some(coords), Some(country.clone())); assert_eq!(location_details, location_details_2); } @@ -57,7 +57,7 @@ mod tests { fn test_location_details_debug() { let coords = (0.0, 0.0); let country = "US".to_string(); - let location_details = LocationDetails::new(coords, country.clone()); + let location_details = LocationDetails::new(Some(coords), Some(country.clone())); assert_eq!( format!("{:?}", location_details), @@ -72,7 +72,7 @@ mod tests { fn test_location_details_clone() { let coords = (0.0, 0.0); let country = "US".to_string(); - let location_details = LocationDetails::new(coords, country.clone()); + let location_details = LocationDetails::new(Some(coords), Some(country.clone())); let cloned_location_details = location_details.clone(); assert_eq!(location_details, cloned_location_details); @@ -85,7 +85,7 @@ mod tests { let coords = (1.2, 3.4); let country = "US".to_string(); - let location_details = LocationDetails::new(coords, country.clone()); + let location_details = LocationDetails::new(Some(coords), Some(country.clone())); let serialized = serde_json::to_string(&location_details).unwrap(); let deserialized: LocationDetails = serde_json::from_str(&serialized).unwrap(); diff --git a/node-metrics/src/service/data_state/node_identity.rs b/node-metrics/src/service/data_state/node_identity.rs index d6270a07c..5edd11fa4 100644 --- a/node-metrics/src/service/data_state/node_identity.rs +++ b/node-metrics/src/service/data_state/node_identity.rs @@ -8,15 +8,15 @@ use surf_disco::Url; /// in the network. #[derive(Clone, PartialEq, Debug, Serialize, Deserialize)] pub struct NodeIdentity { - public_key: BLSPubKey, - name: Option, - wallet_address: Option, - public_url: Option, - company: Option, - location: Option, - operating_system: Option, - node_type: Option, - network_type: Option, + pub(crate) public_key: BLSPubKey, + pub(crate) name: Option, + pub(crate) wallet_address: Option, + pub(crate) public_url: Option, + pub(crate) company: Option, + pub(crate) location: Option, + pub(crate) operating_system: Option, + pub(crate) node_type: Option, + pub(crate) network_type: Option, } impl NodeIdentity { @@ -112,7 +112,10 @@ pub mod tests { Some(Default::default()), Some("https://espressosys.com/".parse().unwrap()), Some("company".to_string()), - Some(LocationDetails::new((0.0, 0.0), "US".to_string())), + Some(LocationDetails::new( + Some((0.0, 0.0)), + Some("US".to_string()), + )), Some("Windows 11".to_string()), Some("espresso".to_string()), Some("residential".to_string()), @@ -203,7 +206,10 @@ pub mod tests { assert_eq!( location, - Some(&LocationDetails::new((0.0, 0.0), "US".to_string())) + Some(&LocationDetails::new( + Some((0.0, 0.0)), + Some("US".to_string()) + )) ); } From 83f81c89f893be745a4f15b15966b92cab171e7e Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Mon, 15 Jul 2024 13:30:58 -0600 Subject: [PATCH 23/72] Update Cargo.lock file for node-metrics --- Cargo.lock | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index cbb42cd0b..233f630d0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6451,14 +6451,18 @@ dependencies = [ "hotshot-stake-table", "hotshot-testing", "hotshot-types", + "prometheus-parse", + "reqwest 0.12.5", "sequencer", "serde", "serde_json", "surf-disco", + "tagged-base64", "tide-disco 0.9.0", "time 0.3.36", "toml", "tracing", + "url", "vbs", ] @@ -7318,6 +7322,18 @@ dependencies = [ "syn 2.0.70", ] +[[package]] +name = "prometheus-parse" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "811031bea65e5a401fb2e1f37d802cca6601e204ac463809a3189352d13b78a5" +dependencies = [ + "chrono", + "itertools 0.12.1", + "once_cell", + "regex", +] + [[package]] name = "proptest" version = "1.5.0" From 7d398d2b3c8e395e1bad7d1e41cd338432cf9c09 Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Mon, 15 Jul 2024 12:52:27 -0600 Subject: [PATCH 24/72] Add parsing of wallet address A wallet address is able to be decoded from a string using the `FromStr` trait. --- node-metrics/src/api/node_validator/v0/mod.rs | 45 ++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/node-metrics/src/api/node_validator/v0/mod.rs b/node-metrics/src/api/node_validator/v0/mod.rs index 80d968171..9b600f132 100644 --- a/node-metrics/src/api/node_validator/v0/mod.rs +++ b/node-metrics/src/api/node_validator/v0/mod.rs @@ -13,9 +13,11 @@ use hotshot_types::traits::signature_key::SignatureKey; use hotshot_types::traits::{signature_key::StakeTableEntryType, stake_table::StakeTableScheme}; use hotshot_types::PeerConfig; use prometheus_parse::Scrape; +use sequencer::state::FeeAccount; use serde::{Deserialize, Serialize}; use std::fmt; use std::io::BufRead; +use std::str::FromStr; use tide_disco::socket::Connection; use tide_disco::{api::ApiError, Api}; use vbs::version::{StaticVersion, StaticVersionType, Version}; @@ -473,8 +475,21 @@ pub fn populate_node_identity_from_scrape(node_identity: &mut NodeIdentity, scra .labels .get("operating_system") .map(|s| s.into()); + // Wallet Address + let parsed_wallet_address_result = node_identity_general_sample + .labels + .get("wallet") + .map(FeeAccount::from_str); - // node_identity.wallet_address = node_identity_general.labels.get("wallet").map(|s| s.into()); + match parsed_wallet_address_result { + Some(Ok(parsed_wallet_address)) => { + node_identity.wallet_address = Some(parsed_wallet_address); + } + Some(Err(err)) => { + tracing::info!("parsing wallet address failed: {}", err); + } + None => {} + } } } @@ -589,8 +604,10 @@ mod tests { channel::mpsc::{self, Sender}, SinkExt, StreamExt, }; + use sequencer::state::FeeAccount; use std::{ io::{BufRead, BufReader}, + str::FromStr, sync::Arc, }; use tide_disco::App; @@ -908,5 +925,31 @@ consensus_view_duration_as_leader_count 1" let node_identity = super::node_identity_from_scrape(scrape); assert!(node_identity.is_some()); + let node_identity = node_identity.unwrap(); + + assert_eq!( + node_identity.company(), + &Some("Espresso Systems".to_string()) + ); + assert_eq!(node_identity.name(), &Some("sequencer0".to_string())); + assert_eq!(node_identity.network_type(), &Some("local".to_string())); + assert_eq!( + node_identity.node_type(), + &Some("espresso-sequencer 0.1".to_string()) + ); + assert_eq!( + node_identity.operating_system(), + &Some("Linux 5.15.153.1".to_string()) + ); + assert_eq!( + node_identity.wallet_address(), + &Some(FeeAccount::from_str("0x00000000000000000000000000000000").unwrap()) + ); + + assert!(node_identity.location().is_some()); + let node_identity_location = node_identity.location().unwrap(); + + assert_eq!(node_identity_location.country(), &Some("US".to_string())); + assert_eq!(node_identity_location.coords, Some((-40.7128, -74.0060))); } } From 607cd43da5fa499b394cdb803bdb467921039d28 Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Mon, 15 Jul 2024 12:52:51 -0600 Subject: [PATCH 25/72] Fix incorrect key used for longitude The key "latitude" is used for both latitude and longitude values. This corrects the mistake by using "longitude" for longitude. Fixes an off-by-one error. --- node-metrics/src/api/node_validator/v0/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/node-metrics/src/api/node_validator/v0/mod.rs b/node-metrics/src/api/node_validator/v0/mod.rs index 9b600f132..300af846f 100644 --- a/node-metrics/src/api/node_validator/v0/mod.rs +++ b/node-metrics/src/api/node_validator/v0/mod.rs @@ -523,7 +523,7 @@ pub fn populate_node_identity_from_scrape(node_identity: &mut NodeIdentity, scra .map(|s| s.parse::()); let longitude = node_identity_location_sample .labels - .get("latitude") + .get("longitude") .map(|s| s.parse::()); if let (Some(Ok(latitude)), Some(Ok(longitude))) = (latitude, longitude) { From 1a520b9d79668f1cabf7520b6a071c335fdebbce Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Mon, 15 Jul 2024 12:54:23 -0600 Subject: [PATCH 26/72] Fix badly formatted Wallet address The Wallet address is meant to be a 160 bit address, which is 20 bytes. So it's hexidecimal representation should be 40 characters instead of the 32 that have been being set up to this point. e --- node-metrics/src/api/node_validator/v0/mod.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/node-metrics/src/api/node_validator/v0/mod.rs b/node-metrics/src/api/node_validator/v0/mod.rs index 300af846f..0783c0a13 100644 --- a/node-metrics/src/api/node_validator/v0/mod.rs +++ b/node-metrics/src/api/node_validator/v0/mod.rs @@ -763,7 +763,7 @@ consensus_libp2p_num_failed_messages 0 consensus_node{key=\"BLS_VER_KEY~bQszS-QKYvUij2g20VqS8asttGSb95NrTu2PUj0uMh1CBUxNy1FqyPDjZqB29M7ZbjWqj79QkEOWkpga84AmDYUeTuWmy-0P1AdKHD3ehc-dKvei78BDj5USwXPJiDUlCxvYs_9rWYhagaq-5_LXENr78xel17spftNd5MA1Mw5U\"} 1 # HELP consensus_node_identity_general node_identity_general # TYPE consensus_node_identity_general gauge -consensus_node_identity_general{company_name=\"Espresso Systems\",name=\"sequencer0\",network_type=\"local\",node_type=\"espresso-sequencer 0.1\",operating_system=\"Linux 5.15.153.1\",wallet=\"0x00000000000000000000000000000000\"} 1 +consensus_node_identity_general{company_name=\"Espresso Systems\",name=\"sequencer0\",network_type=\"local\",node_type=\"espresso-sequencer 0.1\",operating_system=\"Linux 5.15.153.1\",wallet=\"0x0000000000000000000000000000000000000000\"} 1 # HELP consensus_node_identity_location node_identity_location # TYPE consensus_node_identity_location gauge consensus_node_identity_location{country=\"US\",latitude=\"-40.7128\",longitude=\"-74.0060\"} 1 @@ -894,7 +894,7 @@ consensus_view_duration_as_leader_count 1" ); assert_eq!( node_identity_general.labels.get("wallet"), - Some("0x00000000000000000000000000000000") + Some("0x0000000000000000000000000000000000000000") ); assert_eq!(node_identity_location.labels.get("country"), Some("US")); @@ -943,7 +943,7 @@ consensus_view_duration_as_leader_count 1" ); assert_eq!( node_identity.wallet_address(), - &Some(FeeAccount::from_str("0x00000000000000000000000000000000").unwrap()) + &Some(FeeAccount::from_str("0x0000000000000000000000000000000000000000").unwrap()) ); assert!(node_identity.location().is_some()); From edd7a9fa5dd4c282ee4eab404581bf24ad444251 Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Mon, 15 Jul 2024 13:29:27 -0600 Subject: [PATCH 27/72] Refactor node identity population to occur in separate functions The logic for verifying the node identity in a Scrape, and then the data's population is a little large and daunting to consume all at once. It's better to split these pieces into separate functions so that the related parsing pieces are together. --- node-metrics/src/api/node_validator/v0/mod.rs | 214 ++++++++++++------ 1 file changed, 147 insertions(+), 67 deletions(-) diff --git a/node-metrics/src/api/node_validator/v0/mod.rs b/node-metrics/src/api/node_validator/v0/mod.rs index 0783c0a13..77747cc4a 100644 --- a/node-metrics/src/api/node_validator/v0/mod.rs +++ b/node-metrics/src/api/node_validator/v0/mod.rs @@ -12,7 +12,7 @@ use hotshot_types::signature_key::BLSPubKey; use hotshot_types::traits::signature_key::SignatureKey; use hotshot_types::traits::{signature_key::StakeTableEntryType, stake_table::StakeTableScheme}; use hotshot_types::PeerConfig; -use prometheus_parse::Scrape; +use prometheus_parse::{Sample, Scrape}; use sequencer::state::FeeAccount; use serde::{Deserialize, Serialize}; use std::fmt; @@ -438,10 +438,153 @@ pub async fn stream_leaves_from_hotshot_query_service( Ok(leaves_stream) } +/// [populate_node_identity_general_from_scrape] populates the general +/// information of a [NodeIdentity] from a [Sample] that is expected to be +/// the "consensus_node_identity_general" sample. +fn populate_node_identity_general_from_scrape( + node_identity: &mut NodeIdentity, + node_identity_general_sample: &Sample, +) { + node_identity.name = node_identity_general_sample + .labels + .get("name") + .map(|s| s.into()); + node_identity.company = node_identity_general_sample + .labels + .get("company_name") + .map(|s| s.into()); + node_identity.network_type = node_identity_general_sample + .labels + .get("network_type") + .map(|s| s.into()); + node_identity.node_type = node_identity_general_sample + .labels + .get("node_type") + .map(|s| s.into()); + node_identity.operating_system = node_identity_general_sample + .labels + .get("operating_system") + .map(|s| s.into()); + // Wallet Address + let parsed_wallet_address_result = node_identity_general_sample + .labels + .get("wallet") + .map(FeeAccount::from_str); + + match parsed_wallet_address_result { + Some(Ok(parsed_wallet_address)) => { + node_identity.wallet_address = Some(parsed_wallet_address); + } + Some(Err(err)) => { + tracing::info!("parsing wallet address failed: {}", err); + } + None => {} + } +} + +/// [populate_node_location_from_scrape] populates the location information of a +/// [NodeIdentity] from a [Sample] that is expected to be the +/// "consensus_node_identity_location" sample. +fn populate_node_location_from_scrape( + node_identity: &mut NodeIdentity, + node_identity_location_sample: &Sample, +) { + let mut location = node_identity + .location + .take() + .unwrap_or(LocationDetails::new(None, None)); + location.country = node_identity_location_sample + .labels + .get("country") + .map(|s| s.into()); + + let latitude = node_identity_location_sample + .labels + .get("latitude") + .map(|s| s.parse::()); + let longitude = node_identity_location_sample + .labels + .get("longitude") + .map(|s| s.parse::()); + + if let (Some(Ok(latitude)), Some(Ok(longitude))) = (latitude, longitude) { + location.coords = Some((latitude, longitude)); + } + + // Are there any details populated? + if location.country.is_some() || location.coords.is_some() { + node_identity.location = Some(location); + } else { + node_identity.location = None; + } +} + +/// [populate_node_identity_from_scrape] populates a [NodeIdentity] from a +/// [Scrape] that is expected to contain the necessary information to populate +/// the [NodeIdentity]. pub fn populate_node_identity_from_scrape(node_identity: &mut NodeIdentity, scrape: Scrape) { // Handle General Information Population + // Let's verify that the scrape information contains and matches our node + // identity's public key. + { + let node_key = scrape + .docs + .iter() + .find(|(_, key)| key == &"node") + .map(|(key, _)| key); + + let node_key = if let Some(node_key) = node_key { + node_key + } else { + // We were unable to find the key for the public key on the metrics + // scrape result. + tracing::warn!("scrape result doesn't seem to contain 'node' key, preventing us from verifying the public key"); + return; + }; + + let node_sample = scrape + .samples + .iter() + .find(|sample| &sample.metric == node_key); + + let node_sample = if let Some(node_sample) = node_sample { + node_sample + } else { + // We were unable to find the sample for the public key on the metrics + // scrape result. + tracing::warn!("scrape result doesn't seem to contain 'node' sample, preventing us from verifying the public key. This is especially odd considering that we found the 'node' key already."); + return; + }; + + let public_key_string = node_sample.labels.get("key"); + + let public_key_from_scrape = if let Some(public_key_string) = public_key_string { + if let Ok(public_key) = BLSPubKey::from_str(public_key_string) { + public_key + } else { + // We were unable to parse the public key from the scrape result. + tracing::warn!( + "parsing public key failed, preventing us from verifying the public key" + ); + return; + } + } else { + // We were unable to find the public key in the scrape result. + tracing::warn!("scrape result doesn't seem to contain 'key' label in the 'node' sample, preventing us from verifying the public key. This is especially odd considering that we found the 'node' key and sample already."); + return; + }; + + if &public_key_from_scrape != node_identity.public_key() { + tracing::warn!("node identity public key doesn't match public key in scrape, are we hitting the wrong URL, or is it behind a load balancer between multiple nodes?"); + return; + } + + debug_assert_eq!(&public_key_from_scrape, node_identity.public_key()); + } + // Determine the key for the "consensus_node_identity_general" sample + // so we can populate the general information concerning node identity. let node_identity_general_key = scrape .docs .iter() @@ -455,44 +598,11 @@ pub fn populate_node_identity_from_scrape(node_identity: &mut NodeIdentity, scra .find(|sample| &sample.metric == node_identity_general_key); if let Some(node_identity_general_sample) = node_identity_general_sample { - node_identity.name = node_identity_general_sample - .labels - .get("name") - .map(|s| s.into()); - node_identity.company = node_identity_general_sample - .labels - .get("company_name") - .map(|s| s.into()); - node_identity.network_type = node_identity_general_sample - .labels - .get("network_type") - .map(|s| s.into()); - node_identity.node_type = node_identity_general_sample - .labels - .get("node_type") - .map(|s| s.into()); - node_identity.operating_system = node_identity_general_sample - .labels - .get("operating_system") - .map(|s| s.into()); - // Wallet Address - let parsed_wallet_address_result = node_identity_general_sample - .labels - .get("wallet") - .map(FeeAccount::from_str); - - match parsed_wallet_address_result { - Some(Ok(parsed_wallet_address)) => { - node_identity.wallet_address = Some(parsed_wallet_address); - } - Some(Err(err)) => { - tracing::info!("parsing wallet address failed: {}", err); - } - None => {} - } + populate_node_identity_general_from_scrape(node_identity, node_identity_general_sample); } } + // Lookup node identity location information, so we can populate it. let node_identity_location_key = scrape .docs .iter() @@ -504,38 +614,8 @@ pub fn populate_node_identity_from_scrape(node_identity: &mut NodeIdentity, scra .iter() .find(|sample| &sample.metric == node_identity_location_key); - // We either have an existing location, or we'd potentially like to create - // one. - if let Some(node_identity_location_sample) = node_identity_location_sample { - let mut location = node_identity - .location - .take() - .unwrap_or(LocationDetails::new(None, None)); - location.country = node_identity_location_sample - .labels - .get("country") - .map(|s| s.into()); - - let latitude = node_identity_location_sample - .labels - .get("latitude") - .map(|s| s.parse::()); - let longitude = node_identity_location_sample - .labels - .get("longitude") - .map(|s| s.parse::()); - - if let (Some(Ok(latitude)), Some(Ok(longitude))) = (latitude, longitude) { - location.coords = Some((latitude, longitude)); - } - - // Are there any details populated? - if location.country.is_some() || location.coords.is_some() { - node_identity.location = Some(location); - } else { - node_identity.location = None; - } + populate_node_location_from_scrape(node_identity, node_identity_location_sample); } } } From 3707bc674198f8cc41661544c4d17cd27cd85dc4 Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Tue, 16 Jul 2024 07:29:28 -0600 Subject: [PATCH 28/72] Fix LocationDetails Debug trait test --- node-metrics/src/service/data_state/location_details.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/node-metrics/src/service/data_state/location_details.rs b/node-metrics/src/service/data_state/location_details.rs index ebdca0453..56304c00b 100644 --- a/node-metrics/src/service/data_state/location_details.rs +++ b/node-metrics/src/service/data_state/location_details.rs @@ -62,7 +62,7 @@ mod tests { assert_eq!( format!("{:?}", location_details), format!( - "LocationDetails {{ coords: {:?}, country: {:?} }}", + "LocationDetails {{ coords: Some({:?}), country: Some({:?}) }}", coords, country ) ); From f23bde28a8b08a536d0cb9a7b1835598c904a9d3 Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Tue, 16 Jul 2024 07:35:33 -0600 Subject: [PATCH 29/72] Fix test_process_client_handling_stream_subscribe_voteres test The test `test_process_client_handling_stream_subscribe_voters` never completes and just runs forever. The reason for this is due to a flaw in the creation of the test itself. The test is intended to check on the real-time submission of voters data, and the distribution of that data to the subscribers. However, the setup has the users subscribe to `NodeIdentity` updates instead of `Voters` updates. To fix the issue we just need to change the subscribe calls to refer to voters instead. --- node-metrics/src/service/client_state/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/node-metrics/src/service/client_state/mod.rs b/node-metrics/src/service/client_state/mod.rs index 12f091792..8004e58dd 100644 --- a/node-metrics/src/service/client_state/mod.rs +++ b/node-metrics/src/service/client_state/mod.rs @@ -1732,14 +1732,14 @@ pub mod tests { assert_eq!( internal_client_message_sender_1 - .send(InternalClientMessage::SubscribeNodeIdentity(client_1_id)) + .send(InternalClientMessage::SubscribeVoters(client_1_id)) .await, Ok(()), ); assert_eq!( internal_client_message_sender_1 - .send(InternalClientMessage::SubscribeNodeIdentity(client_2_id)) + .send(InternalClientMessage::SubscribeVoters(client_2_id)) .await, Ok(()), ); From 1bded48c217c17ea4a1cb19de297f61a47323154 Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Tue, 16 Jul 2024 13:40:53 -0600 Subject: [PATCH 30/72] Add process_node_identity_stream We need to process a stream of updating node identity information as it comes in. In this way we can ensure that we have a single location where we update the local stored state of node identity information, and the hand-off to potential subscribers of the real time updates of the node identity information. --- node-metrics/src/service/data_state/mod.rs | 224 ++++++++++++++++++++- 1 file changed, 223 insertions(+), 1 deletion(-) diff --git a/node-metrics/src/service/data_state/mod.rs b/node-metrics/src/service/data_state/mod.rs index 91bc38725..0173e707b 100644 --- a/node-metrics/src/service/data_state/mod.rs +++ b/node-metrics/src/service/data_state/mod.rs @@ -327,16 +327,108 @@ pub async fn process_leaf_stream( } } +/// [ProcessNodeIdentityError] represents the error that can occur when processing +/// a [NodeIdentity]. +#[derive(Debug)] +pub enum ProcessNodeIdentityError { + SendError(SendError), +} + +impl std::fmt::Display for ProcessNodeIdentityError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ProcessNodeIdentityError::SendError(err) => { + write!(f, "error sending node identity to sender: {}", err) + } + } + } +} + +impl std::error::Error for ProcessNodeIdentityError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + ProcessNodeIdentityError::SendError(err) => Some(err), + } + } +} + +impl From for ProcessNodeIdentityError { + fn from(err: SendError) -> Self { + ProcessNodeIdentityError::SendError(err) + } +} + +/// [process_incoming_node_identity] is a helper function that will process an +/// incoming [NodeIdentity] and update the [DataState] with the new information. +/// Additionally, the [NodeIdentity] will be sent to the [Sender] so that it can +/// be processed for real-time considerations. +async fn process_incoming_node_identity( + node_identity: NodeIdentity, + data_state: Arc>, + mut node_identity_sender: Sender, +) -> Result<(), ProcessNodeIdentityError> { + let mut data_state_write_lock_guard = data_state.write().await; + data_state_write_lock_guard.add_node_identity(node_identity.clone()); + node_identity_sender.send(node_identity).await?; + + Ok(()) +} + +/// [process_node_identity_stream] allows for the consumption of a [Stream] when +/// attempting to process new incoming [NodeIdentity]s. +/// This function will process the incoming [NodeIdentity] and update the +/// [DataState] with the new information. +/// Additionally, the [NodeIdentity] will be sent to the [Sender] so that it can +/// be processed for real-time considerations. +pub async fn process_node_identity_stream( + mut stream: S, + data_state: Arc>, + node_identity_sender: Sender, +) where + S: Stream + Unpin, +{ + loop { + let node_identity_result = stream.next().await; + let node_identity = if let Some(node_identity) = node_identity_result { + node_identity + } else { + // We have reached the end of the stream + tracing::info!( + "process node identity stream: end of stream reached for node identity stream." + ); + return; + }; + + if let Err(err) = process_incoming_node_identity( + node_identity, + data_state.clone(), + node_identity_sender.clone(), + ) + .await + { + // We have an error that prevents us from continuing + tracing::info!( + "process node identity stream: error processing node identity: {}", + err + ); + break; + } + } +} + #[cfg(test)] mod tests { use super::{process_leaf_stream, DataState}; + use crate::service::data_state::{process_node_identity_stream, LocationDetails, NodeIdentity}; use async_std::{prelude::FutureExt, sync::RwLock}; use futures::{channel::mpsc, SinkExt, StreamExt}; + use hotshot_types::{signature_key::BLSPubKey, traits::signature_key::SignatureKey}; use sequencer::{ - state::{BlockMerkleTree, FeeMerkleTree}, + state::{BlockMerkleTree, FeeAccount, FeeMerkleTree}, ChainConfig, Leaf, NodeState, ValidatedState, }; use std::{sync::Arc, time::Duration}; + use url::Url; #[async_std::test] async fn test_process_leaf_error_debug() { @@ -424,4 +516,134 @@ mod tests { Ok(()) ); } + + #[async_std::test] + async fn test_process_node_identity_stream() { + let data_state: DataState = Default::default(); + let data_state = Arc::new(RwLock::new(data_state)); + let (node_identity_sender_1, node_identity_receiver_1) = futures::channel::mpsc::channel(1); + let (node_identity_sender_2, node_identity_receiver_2) = futures::channel::mpsc::channel(1); + + let process_node_identity_task_handle = + async_std::task::spawn(process_node_identity_stream( + node_identity_receiver_1, + data_state.clone(), + node_identity_sender_2, + )); + + { + let data_state = data_state.read().await; + // Latest blocks should be empty + assert_eq!(data_state.node_identity().count(), 0); + } + + // Send a node update to the Stream + let public_key_1 = BLSPubKey::generated_from_seed_indexed([0; 32], 0).0; + let node_identity_1 = NodeIdentity::from_public_key(public_key_1); + + let mut node_identity_sender_1 = node_identity_sender_1; + let mut node_identity_receiver_2 = node_identity_receiver_2; + + assert_eq!( + node_identity_sender_1.send(node_identity_1.clone()).await, + Ok(()) + ); + + assert_eq!( + node_identity_receiver_2.next().await, + Some(node_identity_1.clone()) + ); + + { + let data_state = data_state.read().await; + // Latest blocks should now have a single entry + assert_eq!(data_state.node_identity().count(), 1); + assert_eq!(data_state.node_identity().next(), Some(&node_identity_1)); + } + + // If we send the same node identity again, we should not have a new entry. + assert_eq!( + node_identity_sender_1.send(node_identity_1.clone()).await, + Ok(()) + ); + + assert_eq!( + node_identity_receiver_2.next().await, + Some(node_identity_1.clone()) + ); + + { + let data_state = data_state.read().await; + // Latest blocks should now have a single entry + assert_eq!(data_state.node_identity().count(), 1); + assert_eq!(data_state.node_identity().next(), Some(&node_identity_1)); + } + + // If we send an update for that node instead, it should update the + // entry. + let node_identity_1 = NodeIdentity::new( + public_key_1, + Some("name".to_string()), + Some(FeeAccount::default()), + Some(Url::parse("https://example.com/").unwrap()), + Some("company".to_string()), + Some(LocationDetails::new( + Some((40.7128, -74.0060)), + Some("US".to_string()), + )), + Some("operating_system".to_string()), + Some("node_type".to_string()), + Some("network_type".to_string()), + ); + assert_eq!( + node_identity_sender_1.send(node_identity_1.clone()).await, + Ok(()) + ); + + assert_eq!( + node_identity_receiver_2.next().await, + Some(node_identity_1.clone()) + ); + + { + let data_state = data_state.read().await; + // Latest blocks should now have a single entry + assert_eq!(data_state.node_identity().count(), 1); + assert_eq!(data_state.node_identity().next(), Some(&node_identity_1)); + } + + // If we send a new node identity, it should result in a new node + // identity + + let public_key_2 = BLSPubKey::generated_from_seed_indexed([0; 32], 1).0; + let node_identity_2 = NodeIdentity::from_public_key(public_key_2); + + assert_eq!( + node_identity_sender_1.send(node_identity_2.clone()).await, + Ok(()) + ); + + assert_eq!( + node_identity_receiver_2.next().await, + Some(node_identity_2.clone()) + ); + + { + let data_state = data_state.read().await; + // Latest blocks should now have a single entry + assert_eq!(data_state.node_identity().count(), 2); + assert_eq!(data_state.node_identity().next(), Some(&node_identity_1)); + assert_eq!(data_state.node_identity().last(), Some(&node_identity_2)); + } + + // We explicitly drop these, as it should make the task clean up. + drop(node_identity_sender_1); + + assert_eq!( + process_node_identity_task_handle + .timeout(Duration::from_millis(200)) + .await, + Ok(()) + ); + } } From 339924d1ab37389183cef731a0e8e03bdfabdce0 Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Tue, 16 Jul 2024 15:21:53 -0600 Subject: [PATCH 31/72] Add process_node_identity_url_stream The `process_node_identity_url_stream` function consumes a Stream of Urls to ultimately generate any decoded `NodeIdentity` information. Rename `test_api_creation` to `test_full_setup_example` `test_full_setup_example` has been updated to connect the new parts of the service that allow for the parsing and relaying of node identity information. --- node-metrics/src/api/node_validator/v0/mod.rs | 118 ++++++++++++++++-- 1 file changed, 109 insertions(+), 9 deletions(-) diff --git a/node-metrics/src/api/node_validator/v0/mod.rs b/node-metrics/src/api/node_validator/v0/mod.rs index 77747cc4a..c2b19ee49 100644 --- a/node-metrics/src/api/node_validator/v0/mod.rs +++ b/node-metrics/src/api/node_validator/v0/mod.rs @@ -2,6 +2,7 @@ use crate::service::client_message::{ClientMessage, InternalClientMessage}; use crate::service::data_state::{LocationDetails, NodeIdentity}; use crate::service::server_message::ServerMessage; use futures::future::Either; +use futures::Sink; use futures::{ channel::mpsc::{self, Sender}, FutureExt, SinkExt, StreamExt, @@ -20,6 +21,7 @@ use std::io::BufRead; use std::str::FromStr; use tide_disco::socket::Connection; use tide_disco::{api::ApiError, Api}; +use url::Url; use vbs::version::{StaticVersion, StaticVersionType, Version}; /// CONSTANT for protocol major version @@ -351,6 +353,17 @@ pub enum GetNodeIdentityFromUrlError { NoNodeIdentity, } +impl std::fmt::Display for GetNodeIdentityFromUrlError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + GetNodeIdentityFromUrlError::Url(err) => write!(f, "url: {}", err), + GetNodeIdentityFromUrlError::Reqwest(err) => write!(f, "reqwest error: {}", err), + GetNodeIdentityFromUrlError::Io(err) => write!(f, "io error: {}", err), + GetNodeIdentityFromUrlError::NoNodeIdentity => write!(f, "no node identity"), + } + } +} + impl From for GetNodeIdentityFromUrlError { fn from(err: url::ParseError) -> Self { GetNodeIdentityFromUrlError::Url(err) @@ -369,6 +382,12 @@ impl From for GetNodeIdentityFromUrlError { } } +/// [get_node_identity_from_url] retrieves a [NodeIdentity] from a URL. It +/// expects a [url::Url] to be provided so that it can make the request to the +/// Sequencer status metrics API. It will return a [NodeIdentity] that is +/// populated with the data retrieved from the Sequencer status metrics API. +/// If no [NodeIdentity] is found, it will return a +/// [GetNodeIdentityFromUrlError::NoNodeIdentity] error. pub async fn get_node_identity_from_url( url: url::Url, ) -> Result { @@ -620,6 +639,10 @@ pub fn populate_node_identity_from_scrape(node_identity: &mut NodeIdentity, scra } } +/// [node_identity_from_scrape] creates a [NodeIdentity] from a [Scrape]. It +/// expects the [Scrape] to contain the necessary information to populate the +/// [NodeIdentity]. If the [Scrape] doesn't contain the necessary information +/// to populate the [NodeIdentity], then it will return [None]. pub fn node_identity_from_scrape(scrape: Scrape) -> Option { let node_key = scrape .docs @@ -662,11 +685,53 @@ pub fn node_identity_from_scrape(scrape: Scrape) -> Option { Some(node_identity) } +/// [process_node_identity_url_stream] processes a stream of [Url]s that are +/// expected to contain a Node Identity. It will attempt to retrieve the Node +/// Identity from the [Url] and then send it to the [Sink] provided. If the +/// [Sink] is closed, then the function will return. +pub async fn process_node_identity_url_stream( + node_identity_url_stream: T, + node_identity_sink: K, +) where + T: futures::Stream + Unpin, + K: Sink + Unpin, +{ + let mut node_identity_url_stream = node_identity_url_stream; + let mut node_identity_sender = node_identity_sink; + loop { + let node_identity_url_result = node_identity_url_stream.next().await; + let node_identity_url = match node_identity_url_result { + Some(node_identity_url) => node_identity_url, + None => { + tracing::info!("node identity url stream closed"); + return; + } + }; + + // Alright we have a new Url to try and scrape for a Node Identity. + // Let's attempt to do that. + let node_identity_result = get_node_identity_from_url(node_identity_url).await; + + let node_identity = match node_identity_result { + Ok(node_identity) => node_identity, + Err(err) => { + tracing::warn!("get node identity from url failed. bad base url?: {}", err); + continue; + } + }; + + let send_result = node_identity_sender.send(node_identity).await; + if let Err(err) = send_result { + tracing::info!("node identity sender closed: {}", err); + return; + } + } +} #[cfg(test)] mod tests { use super::{ - get_stake_table_from_sequencer, stream_leaves_from_hotshot_query_service, Error, - StateClientMessageSender, STATIC_VER_0_1, + get_stake_table_from_sequencer, process_node_identity_url_stream, + stream_leaves_from_hotshot_query_service, Error, StateClientMessageSender, STATIC_VER_0_1, }; use crate::service::{ client_id::ClientId, @@ -677,7 +742,7 @@ mod tests { process_distribute_voters_handling_stream, process_internal_client_message_stream, ClientThreadState, }, - data_state::{process_leaf_stream, DataState}, + data_state::{process_leaf_stream, process_node_identity_stream, DataState}, }; use async_std::sync::RwLock; use futures::{ @@ -702,7 +767,7 @@ mod tests { #[async_std::test] #[ignore] - async fn test_api_creation() { + async fn test_full_setup_example() { let node_validator_api_result = super::define_api::(); let node_validator_api = match node_validator_api_result { @@ -736,9 +801,8 @@ mod tests { ); let client = surf_disco::Client::new( - "https://query.cappuccino.testnet.espresso.network/v0" - .parse() - .unwrap(), + // "https://query.cappuccino.testnet.espresso.network/v0" + "http://localhost:24000/v0".parse().unwrap(), ); let get_stake_table_result = get_stake_table_from_sequencer(client.clone()).await; @@ -749,8 +813,10 @@ mod tests { let client_thread_state = Arc::new(RwLock::new(client_thread_state)); let (block_detail_sender, block_detail_receiver) = mpsc::channel(32); let (leaf_sender, leaf_receiver) = mpsc::channel(32); - let (_node_identity_sender, node_identity_receiver) = mpsc::channel(32); + let (node_identity_sender_1, node_identity_receiver_1) = mpsc::channel(32); + let (node_identity_sender_2, node_identity_receiver_2) = mpsc::channel(32); let (voters_sender, voters_receiver) = mpsc::channel(32); + let (mut url_sender, url_receiver) = mpsc::channel(32); let _process_internal_client_message_handle = async_std::task::spawn(process_internal_client_message_stream( @@ -768,7 +834,7 @@ mod tests { let _process_distribute_node_identity_handle = async_std::task::spawn(process_distribute_node_identity_handling_stream( client_thread_state.clone(), - node_identity_receiver, + node_identity_receiver_2, )); let _process_distribute_voters_handle = async_std::task::spawn( @@ -782,6 +848,18 @@ mod tests { voters_sender, )); + let _process_node_identity_stream_handle = + async_std::task::spawn(process_node_identity_stream( + node_identity_receiver_1, + data_state.clone(), + node_identity_sender_2, + )); + + let _process_url_stream_handle = async_std::task::spawn(process_node_identity_url_stream( + url_receiver, + node_identity_sender_1, + )); + let _leaf_retriever_handle = async_std::task::spawn(async move { // Alright, let's get some leaves, bro @@ -810,6 +888,28 @@ mod tests { } }); + // send the original three node base urls + // This is assuming that demo-native is running, as such those Urls + // should be used / match + { + let urls = vec![ + "http://localhost:24000/", + "http://localhost:24001", + "http://localhost:24002", + "http://localhost:24003", + "http://localhost:24004", + ]; + + for url in urls { + let url = url.parse().unwrap(); + let send_result = url_sender.send(url).await; + if let Err(err) = send_result { + tracing::info!("url sender closed: {}", err); + break; + } + } + } + let _app_serve_result = app.serve("0.0.0.0:9000", STATIC_VER_0_1).await; } From 69aa14810f493a0df2b4104ed2d56d5b37abf0a4 Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Tue, 16 Jul 2024 15:22:29 -0600 Subject: [PATCH 32/72] Add Node Identity information to Sequencer metrics reporting In order for the sequencers to have reportable data available to the Node Validator, they need to expose the identifying data to the node validator in a public way. In this case, we want to record the information in the status/metrics endpoint. However, we would also like this information to be configurable by the end user via Environment variables. This change adds the labels to the various metrics areas, and updates the process-compose.yaml and docker-compose.yaml files to configure some values for the sequencers for local testing. --- docker-compose.yaml | 41 +++++++++++++++++++++++++++++++++++++++++ process-compose.yaml | 41 +++++++++++++++++++++++++++++++++++++++++ sequencer/src/lib.rs | 35 +++++++++++++++++++++++++++++++++++ 3 files changed, 117 insertions(+) diff --git a/docker-compose.yaml b/docker-compose.yaml index 936db67f6..01aff2fa6 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -239,6 +239,14 @@ services: - RUST_LOG - RUST_LOG_FORMAT - ASYNC_STD_THREAD_COUNT + - IDENTITY_NODE_NAME=sequencer0 + - IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000000 + - IDENTITY_COMPANY_NAME=Espresso Systems + - IDENTITY_OPERATING_SYSTEM=Linux 5.15.153.1 + - IDENTITY_NETWORK_TYPE=local + - IDENTITY_COUNTRY_CODE=US + - IDENTITY_LATITUDE=40.7128 + - IDENTITY_LONGITUDE=-74.0060 depends_on: orchestrator: condition: service_healthy @@ -285,6 +293,14 @@ services: - RUST_LOG - RUST_LOG_FORMAT - ASYNC_STD_THREAD_COUNT + - IDENTITY_NODE_NAME=sequencer1 + - IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000001 + - IDENTITY_COMPANY_NAME=Espresso Systems + - IDENTITY_OPERATING_SYSTEM=Darwin 23.5.0 + - IDENTITY_NETWORK_TYPE=local + - IDENTITY_COUNTRY_CODE=GR + - IDENTITY_LATITUDE=39.0742 + - IDENTITY_LONGITUDE=21.8243 depends_on: orchestrator: condition: service_healthy @@ -327,6 +343,14 @@ services: - RUST_LOG - RUST_LOG_FORMAT - ASYNC_STD_THREAD_COUNT + - IDENTITY_NODE_NAME=sequencer2 + - IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000002 + - IDENTITY_COMPANY_NAME=Espresso Systems + - IDENTITY_OPERATING_SYSTEM=Darwin 23.5.0 + - IDENTITY_NETWORK_TYPE=local + - IDENTITY_COUNTRY_CODE=CN + - IDENTITY_LATITUDE=35.8617 + - IDENTITY_LONGITUDE=104.1954 depends_on: orchestrator: condition: service_healthy @@ -365,6 +389,15 @@ services: - RUST_LOG - RUST_LOG_FORMAT - ASYNC_STD_THREAD_COUNT + - IDENTITY_NODE_NAME=sequencer3 + - IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000003 + - IDENTITY_COMPANY_NAME=Espresso Systems + - IDENTITY_OPERATING_SYSTEM=Microsoft Windows NT 10.0.22621.0 + - IDENTITY_E=espresso-sequencer@0.1.0 + - IDENTITY_NETWORK_TYPE=local + - IDENTITY_COUNTRY_CODE=CN + - IDENTITY_LATITUDE=35.8617 + - IDENTITY_LONGITUDE=104.1954 depends_on: orchestrator: condition: service_healthy @@ -403,6 +436,14 @@ services: - RUST_LOG - RUST_LOG_FORMAT - ASYNC_STD_THREAD_COUNT + - IDENTITY_NODE_NAME=sequencer4 + - IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000004 + - IDENTITY_COMPANY_NAME=Espresso Systems + - IDENTITY_OPERATING_SYSTEM=TempleOS 5.03 + - IDENTITY_NETWORK_TYPE=local + - IDENTITY_COUNTRY_CODE=AU + - IDENTITY_LATITUDE=-25.2744 + - IDENTITY_LONGITUDE=133.7751 depends_on: orchestrator: condition: service_healthy diff --git a/process-compose.yaml b/process-compose.yaml index 67796668b..e84451d1e 100644 --- a/process-compose.yaml +++ b/process-compose.yaml @@ -117,6 +117,14 @@ processes: - ESPRESSO_SEQUENCER_PRIVATE_STAKING_KEY=$ESPRESSO_DEMO_SEQUENCER_STAKING_PRIVATE_KEY_0 - ESPRESSO_SEQUENCER_PRIVATE_STATE_KEY=$ESPRESSO_DEMO_SEQUENCER_STATE_PRIVATE_KEY_0 - ESPRESSO_SEQUENCER_IS_DA=true + - IDENTITY_NODE_NAME=sequencer0 + - IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000000 + - IDENTITY_COMPANY_NAME=Espresso Systems + - IDENTITY_OPERATING_SYSTEM=Linux 5.15.153.1 + - IDENTITY_NETWORK_TYPE=local + - IDENTITY_COUNTRY_CODE=US + - IDENTITY_LATITUDE=40.7128 + - IDENTITY_LONGITUDE=-74.0060 depends_on: orchestrator: condition: process_healthy @@ -159,6 +167,14 @@ processes: - ESPRESSO_SEQUENCER_PRIVATE_STAKING_KEY=$ESPRESSO_DEMO_SEQUENCER_STAKING_PRIVATE_KEY_1 - ESPRESSO_SEQUENCER_PRIVATE_STATE_KEY=$ESPRESSO_DEMO_SEQUENCER_STATE_PRIVATE_KEY_1 - ESPRESSO_SEQUENCER_IS_DA=true + - IDENTITY_NODE_NAME=sequencer1 + - IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000001 + - IDENTITY_COMPANY_NAME=Espresso Systems + - IDENTITY_OPERATING_SYSTEM=Darwin 23.5.0 + - IDENTITY_NETWORK_TYPE=local + - IDENTITY_COUNTRY_CODE=GR + - IDENTITY_LATITUDE=39.0742 + - IDENTITY_LONGITUDE=21.8243 depends_on: orchestrator: condition: process_healthy @@ -195,6 +211,14 @@ processes: - ESPRESSO_SEQUENCER_PRIVATE_STAKING_KEY=$ESPRESSO_DEMO_SEQUENCER_STAKING_PRIVATE_KEY_2 - ESPRESSO_SEQUENCER_PRIVATE_STATE_KEY=$ESPRESSO_DEMO_SEQUENCER_STATE_PRIVATE_KEY_2 - ESPRESSO_SEQUENCER_IS_DA=true + - IDENTITY_NODE_NAME=sequencer2 + - IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000002 + - IDENTITY_COMPANY_NAME=Espresso Systems + - IDENTITY_OPERATING_SYSTEM=Darwin 23.5.0 + - IDENTITY_NETWORK_TYPE=local + - IDENTITY_COUNTRY_CODE=CN + - IDENTITY_LATITUDE=35.8617 + - IDENTITY_LONGITUDE=104.1954 depends_on: orchestrator: condition: process_healthy @@ -228,6 +252,15 @@ processes: - ESPRESSO_SEQUENCER_STORAGE_PATH=$ESPRESSO_BASE_STORAGE_PATH/seq3 - ESPRESSO_SEQUENCER_PRIVATE_STAKING_KEY=$ESPRESSO_DEMO_SEQUENCER_STAKING_PRIVATE_KEY_3 - ESPRESSO_SEQUENCER_PRIVATE_STATE_KEY=$ESPRESSO_DEMO_SEQUENCER_STATE_PRIVATE_KEY_3 + - IDENTITY_NODE_NAME=sequencer3 + - IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000003 + - IDENTITY_COMPANY_NAME=Espresso Systems + - IDENTITY_OPERATING_SYSTEM=Microsoft Windows NT 10.0.22621.0 + - IDENTITY_E=espresso-sequencer@0.1.0 + - IDENTITY_NETWORK_TYPE=local + - IDENTITY_COUNTRY_CODE=CN + - IDENTITY_LATITUDE=35.8617 + - IDENTITY_LONGITUDE=104.1954 depends_on: orchestrator: condition: process_healthy @@ -259,6 +292,14 @@ processes: - ESPRESSO_SEQUENCER_STORAGE_PATH=$ESPRESSO_BASE_STORAGE_PATH/seq4 - ESPRESSO_SEQUENCER_PRIVATE_STAKING_KEY=$ESPRESSO_DEMO_SEQUENCER_STAKING_PRIVATE_KEY_4 - ESPRESSO_SEQUENCER_PRIVATE_STATE_KEY=$ESPRESSO_DEMO_SEQUENCER_STATE_PRIVATE_KEY_4 + - IDENTITY_NODE_NAME=sequencer4 + - IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000004 + - IDENTITY_COMPANY_NAME=Espresso Systems + - IDENTITY_OPERATING_SYSTEM=TempleOS 5.03 + - IDENTITY_NETWORK_TYPE=local + - IDENTITY_COUNTRY_CODE=AU + - IDENTITY_LATITUDE=-25.2744 + - IDENTITY_LONGITUDE=133.7751 depends_on: orchestrator: condition: process_healthy diff --git a/sequencer/src/lib.rs b/sequencer/src/lib.rs index ea168ec4f..5f3a1c07b 100644 --- a/sequencer/src/lib.rs +++ b/sequencer/src/lib.rs @@ -143,6 +143,41 @@ pub async fn init_node( env!("VERGEN_GIT_COMMIT_TIMESTAMP").into(), ]); + // Expose Node Entity Information via the status/metrics API + metrics + .text_family( + "node_identity_general".into(), + vec![ + "name".into(), + "wallet".into(), + "company_name".into(), + "operating_system".into(), + "node_type".into(), + "network_type".into(), + ], + ) + .create(vec![ + std::env::var("IDENTITY_NODE_NAME").unwrap_or("".into()), + std::env::var("IDENTITY_WALLET_ADDRESS").unwrap_or("".into()), + std::env::var("IDENTITY_COMPANY_NAME").unwrap_or("".into()), + std::env::var("IDENTITY_OPERATING_SYSTEM").unwrap_or("".into()), + std::env::var("IDENTITY_NODE_TYPE") + .unwrap_or(format!("espresso-sequencer {}", Ver::VERSION)), + std::env::var("IDENTITY_NETWORK_TYPE").unwrap_or("".into()), + ]); + + // Expose Node Identity Location via the status/metrics API + metrics + .text_family( + "node_identity_location".into(), + vec!["country".into(), "latitude".into(), "longitude".into()], + ) + .create(vec![ + std::env::var("IDENTITY_COUNTRY_CODE").unwrap_or("".into()), + std::env::var("IDENTITY_LATITUDE").unwrap_or("".into()), + std::env::var("IDENTITY_LONGITUDE").unwrap_or("".into()), + ]); + // Stick our public key in `metrics` so it is easily accessible via the status API. let pub_key = BLSPubKey::from_private(&network_params.private_staking_key); metrics From 8daca23820f9a0da01866121f7798555629830e4 Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Wed, 17 Jul 2024 09:18:04 -0600 Subject: [PATCH 33/72] Replace Sender with generic parameter In order to make the implementation of the code more flexible and to support different types of `Sink`s, the hard-coded use of `Sender` has been replaced with a generic parameter `K` instead. This allows for us to swap out different types of `Sink`s and `Stream`s throughtout the code base. --- node-metrics/src/api/node_validator/v0/mod.rs | 17 +- .../src/service/client_message/mod.rs | 17 +- node-metrics/src/service/client_state/mod.rs | 170 ++++++++++-------- node-metrics/src/service/data_state/mod.rs | 41 +++-- 4 files changed, 140 insertions(+), 105 deletions(-) diff --git a/node-metrics/src/api/node_validator/v0/mod.rs b/node-metrics/src/api/node_validator/v0/mod.rs index c2b19ee49..c8da6dc44 100644 --- a/node-metrics/src/api/node_validator/v0/mod.rs +++ b/node-metrics/src/api/node_validator/v0/mod.rs @@ -163,8 +163,8 @@ impl From for DefineApiError { /// [StateClientMessageSender] allows for the retrieval of a [Sender] for sending /// messages received from the client to the Server for request processing. -pub trait StateClientMessageSender { - fn sender(&self) -> Sender; +pub trait StateClientMessageSender { + fn sender(&self) -> Sender>; } #[derive(Debug)] @@ -172,7 +172,7 @@ pub enum EndpointError {} pub fn define_api() -> Result, DefineApiError> where - State: StateClientMessageSender + Send + Sync + 'static, + State: StateClientMessageSender> + Send + Sync + 'static, { let mut api = load_api::(include_str!("./node_validator.toml"))?; @@ -414,7 +414,7 @@ pub async fn get_node_identity_from_url( /// block height to begin streaming from. No matter what the value of /// [current_block_height] is the stream will always check what the latest /// block height is on the hotshot query service. It will then attempt to -/// pull as few Leafs as it needs from the stream. +/// pull as few Leaves as it needs from the stream. pub async fn stream_leaves_from_hotshot_query_service( current_block_height: Option, client: surf_disco::Client, @@ -743,6 +743,7 @@ mod tests { ClientThreadState, }, data_state::{process_leaf_stream, process_node_identity_stream, DataState}, + server_message::ServerMessage, }; use async_std::sync::RwLock; use futures::{ @@ -757,10 +758,10 @@ mod tests { }; use tide_disco::App; - struct TestState(Sender); + struct TestState(Sender>>); - impl StateClientMessageSender for TestState { - fn sender(&self) -> Sender { + impl StateClientMessageSender> for TestState { + fn sender(&self) -> Sender>> { self.0.clone() } } @@ -792,7 +793,7 @@ mod tests { Default::default(), ); - let client_thread_state = ClientThreadState::new( + let client_thread_state = ClientThreadState::>::new( Default::default(), Default::default(), Default::default(), diff --git a/node-metrics/src/service/client_message/mod.rs b/node-metrics/src/service/client_message/mod.rs index 2ca97a79f..ad8e720af 100644 --- a/node-metrics/src/service/client_message/mod.rs +++ b/node-metrics/src/service/client_message/mod.rs @@ -1,6 +1,4 @@ use super::client_id::ClientId; -use super::server_message::ServerMessage; -use futures::channel::mpsc::Sender; use serde::{Deserialize, Serialize}; /// [ClientMessage] represents the messages that the client can send to the @@ -23,8 +21,8 @@ pub enum ClientMessage { /// in order for the server to send back responses that correspond to the /// request. #[derive(Debug)] -pub enum InternalClientMessage { - Connected(Sender), +pub enum InternalClientMessage { + Connected(K), Disconnected(ClientId), SubscribeLatestBlock(ClientId), @@ -37,7 +35,7 @@ pub enum InternalClientMessage { RequestVotersSnapshot(ClientId), } -impl PartialEq for InternalClientMessage { +impl PartialEq for InternalClientMessage { fn eq(&self, other: &Self) -> bool { match (self, other) { // We don't care about the [Sender] here, as it is unable to be @@ -63,7 +61,7 @@ impl PartialEq for InternalClientMessage { impl ClientMessage { /// [to_internal_with_client_id] converts the [ClientMessage] into an /// [InternalClientMessage] with the given [ClientId]. - pub fn to_internal_with_client_id(&self, client_id: ClientId) -> InternalClientMessage { + pub fn to_internal_with_client_id(&self, client_id: ClientId) -> InternalClientMessage { match self { ClientMessage::SubscribeLatestBlock => { InternalClientMessage::SubscribeLatestBlock(client_id) @@ -92,6 +90,8 @@ impl ClientMessage { mod tests { use super::InternalClientMessage; use super::*; + use crate::service::server_message::ServerMessage; + use futures::channel::mpsc::Sender; use std::iter::zip; #[test] @@ -170,7 +170,8 @@ mod tests { for message in messages { for i in 0..10 { let client_id = ClientId::from_count(i); - let internal_client_message = message.to_internal_with_client_id(client_id); + let internal_client_message = + message.to_internal_with_client_id::>(client_id); match internal_client_message { InternalClientMessage::SubscribeLatestBlock(id) => { assert_eq!(id, client_id); @@ -198,7 +199,7 @@ mod tests { #[test] fn test_internal_client_message_partial_eq() { - let (sender, _) = futures::channel::mpsc::channel(1); + let (sender, _) = futures::channel::mpsc::channel::(1); let messages = [ InternalClientMessage::Connected(sender), InternalClientMessage::Disconnected(ClientId::from_count(1)), diff --git a/node-metrics/src/service/client_state/mod.rs b/node-metrics/src/service/client_state/mod.rs index 8004e58dd..25d21403b 100644 --- a/node-metrics/src/service/client_state/mod.rs +++ b/node-metrics/src/service/client_state/mod.rs @@ -6,10 +6,7 @@ use super::{ }; use async_std::sync::{RwLock, RwLockWriteGuard}; use bitvec::vec::BitVec; -use futures::{ - channel::mpsc::{SendError, Sender}, - SinkExt, Stream, StreamExt, -}; +use futures::{channel::mpsc::SendError, Sink, SinkExt, Stream, StreamExt}; use hotshot_query_service::explorer::{BlockDetail, ExplorerHistograms}; use sequencer::SeqTypes; use std::{ @@ -21,14 +18,14 @@ use std::{ /// It maintains and represents the connected clients, and their subscriptions. // This state is meant to be managed in a separate thread that assists with // processing and updating of individual client states. -pub struct ClientState { +pub struct ClientState { client_id: ClientId, - sender: Sender, + sender: K, } -impl ClientState { +impl ClientState { /// Create a new ClientState with the given client_id and receiver. - pub fn new(client_id: ClientId, sender: Sender) -> Self { + pub fn new(client_id: ClientId, sender: K) -> Self { Self { client_id, sender } } @@ -36,7 +33,7 @@ impl ClientState { self.client_id } - pub fn sender(&self) -> &Sender { + pub fn sender(&self) -> &K { &self.sender } } @@ -44,17 +41,17 @@ impl ClientState { /// [ClientThreadState] represents the state of all of the active client /// connections connected to the service. This state governs which clients /// are connected, and what subscriptions they have setup. -pub struct ClientThreadState { - clients: HashMap, +pub struct ClientThreadState { + clients: HashMap>, subscribed_latest_block: HashSet, subscribed_node_identity: HashSet, subscribed_voters: HashSet, connection_id_counter: ClientId, } -impl ClientThreadState { +impl ClientThreadState { pub fn new( - clients: HashMap, + clients: HashMap>, subscribed_latest_block: HashSet, subscribed_node_identity: HashSet, subscribed_voters: HashSet, @@ -72,10 +69,10 @@ impl ClientThreadState { /// [drop_client_client_thread_state_write_guard] is a utility function for /// cleaning up the [ClientThreadState] -fn drop_client_client_thread_state_write_guard( +fn drop_client_client_thread_state_write_guard( client_id: &ClientId, - client_thread_state_write_guard: &mut RwLockWriteGuard, -) -> Option { + client_thread_state_write_guard: &mut RwLockWriteGuard>, +) -> Option> { let client = client_thread_state_write_guard.clients.remove(client_id); client_thread_state_write_guard .subscribed_latest_block @@ -89,10 +86,10 @@ fn drop_client_client_thread_state_write_guard( /// [drop_client_no_lock_guard] is a utility function for cleaning up the [ClientThreadState] /// when a client is detected as disconnected. -async fn drop_client_no_lock_guard( +async fn drop_client_no_lock_guard( client_id: &ClientId, - client_thread_state: Arc>, -) -> Option { + client_thread_state: Arc>>, +) -> Option> { let mut client_thread_state_write_lock_guard = client_thread_state.write().await; drop_client_client_thread_state_write_guard( @@ -128,10 +125,13 @@ impl std::error::Error for HandleConnectedError { /// [handle_client_message_connected] is a function that processes the client /// message to connect a client to the service. -pub async fn handle_client_message_connected( - mut sender: Sender, - client_thread_state: Arc>, -) -> Result { +pub async fn handle_client_message_connected( + mut sender: K, + client_thread_state: Arc>>, +) -> Result +where + K: Sink + Clone + Unpin, +{ let mut client_thread_state_write_lock_guard = client_thread_state.write().await; client_thread_state_write_lock_guard.connection_id_counter += 1; @@ -160,9 +160,9 @@ pub async fn handle_client_message_connected( /// [handle_client_message_disconnected] is a function that processes the client /// message to disconnect a client from the service. -pub async fn handle_client_message_disconnected( +pub async fn handle_client_message_disconnected( client_id: ClientId, - client_thread_state: Arc>, + client_thread_state: Arc>>, ) { // We might receive an implicit disconnect when attempting to // send a message, as the receiving channel might be closed. @@ -171,9 +171,9 @@ pub async fn handle_client_message_disconnected( /// [handle_client_message_subscribe_latest_block] is a function that processes /// the client message to subscribe to the latest block stream. -pub async fn handle_client_message_subscribe_latest_block( +pub async fn handle_client_message_subscribe_latest_block( client_id: ClientId, - client_thread_state: Arc>, + client_thread_state: Arc>>, ) { let mut client_thread_state_write_lock_guard = client_thread_state.write().await; @@ -187,9 +187,9 @@ pub async fn handle_client_message_subscribe_latest_block( /// [handle_client_message_subscribe_node_identity] is a function that processes /// the client message to subscribe to the node identity stream. -pub async fn handle_client_message_subscribe_node_identity( +pub async fn handle_client_message_subscribe_node_identity( client_id: ClientId, - client_thread_state: Arc>, + client_thread_state: Arc>>, ) { let mut client_thread_state_write_lock_guard = client_thread_state.write().await; @@ -203,9 +203,9 @@ pub async fn handle_client_message_subscribe_node_identity( /// [handle_client_message_subscribe_voters] is a function that processes /// the client message to subscribe to the voters bitvecs. -pub async fn handle_client_message_subscribe_voters( +pub async fn handle_client_message_subscribe_voters( client_id: ClientId, - client_thread_state: Arc>, + client_thread_state: Arc>>, ) { let mut client_thread_state_write_lock_guard = client_thread_state.write().await; @@ -248,11 +248,14 @@ impl std::error::Error for HandleRequestBlocksSnapshotsError { /// [handle_client_message_request_blocks_snapshot] is a function that processes /// the client message request for a blocks snapshot. -pub async fn handle_client_message_request_blocks_snapshot( +pub async fn handle_client_message_request_blocks_snapshot( client_id: ClientId, data_state: Arc>, - client_thread_state: Arc>, -) -> Result<(), HandleRequestBlocksSnapshotsError> { + client_thread_state: Arc>>, +) -> Result<(), HandleRequestBlocksSnapshotsError> +where + K: Sink + Clone + Unpin, +{ let (client_thread_state_read_lock_guard, data_state_read_lock_guard) = futures::join!(client_thread_state.read(), data_state.read()); @@ -316,11 +319,14 @@ impl std::error::Error for HandleRequestNodeIdentitySnapshotError { /// [handle_client_message_request_node_identity_snapshot] is a function that /// processes the client message request for a node identity snapshot. -pub async fn handle_client_message_request_node_identity_snapshot( +pub async fn handle_client_message_request_node_identity_snapshot( client_id: ClientId, data_state: Arc>, - client_thread_state: Arc>, -) -> Result<(), HandleRequestNodeIdentitySnapshotError> { + client_thread_state: Arc>>, +) -> Result<(), HandleRequestNodeIdentitySnapshotError> +where + K: Sink + Clone + Unpin, +{ // Let's send the current Blocks Snapshot to the client let (client_thread_state_read_lock_guard, data_state_read_lock_guard) = futures::join!(client_thread_state.read(), data_state.read()); @@ -381,11 +387,14 @@ impl std::error::Error for HandleRequestHistogramSnapshotError { /// [handle_client_message_request_histogram_snapshot] is a function that /// processes the client message request for a histogram snapshot. -pub async fn handle_client_message_request_histogram_snapshot( +pub async fn handle_client_message_request_histogram_snapshot( client_id: ClientId, data_state: Arc>, - client_thread_state: Arc>, -) -> Result<(), HandleRequestHistogramSnapshotError> { + client_thread_state: Arc>>, +) -> Result<(), HandleRequestHistogramSnapshotError> +where + K: Sink + Clone + Unpin, +{ // Let's send the current histogram data snapshot to the client let (client_thread_state_read_lock_guard, data_state_read_lock_guard) = futures::join!(client_thread_state.read(), data_state.read()); @@ -465,11 +474,14 @@ impl std::error::Error for HandleRequestVotersSnapshotError { /// [handle_client_message_request_voters_snapshot] is a function that processes /// the client message request for a voters snapshot. -pub async fn handle_client_message_request_voters_snapshot( +pub async fn handle_client_message_request_voters_snapshot( client_id: ClientId, data_state: Arc>, - client_thread_state: Arc>, -) -> Result<(), HandleRequestVotersSnapshotError> { + client_thread_state: Arc>>, +) -> Result<(), HandleRequestVotersSnapshotError> +where + K: Sink + Clone + Unpin, +{ let (client_thread_state_read_lock_guard, data_state_read_lock_guard) = futures::join!(client_thread_state.read(), data_state.read()); @@ -589,11 +601,14 @@ impl std::error::Error for ProcessClientMessageError { /// The [ClientThreadState] is provided as it needs to be updated with new /// subscriptions / new connections depending on the incoming /// [InternalClientMessage] -pub async fn process_client_message( - message: InternalClientMessage, +pub async fn process_client_message( + message: InternalClientMessage, data_state: Arc>, - client_thread_state: Arc>, -) -> Result<(), ProcessClientMessageError> { + client_thread_state: Arc>>, +) -> Result<(), ProcessClientMessageError> +where + K: Sink + Clone + Unpin, +{ match message { InternalClientMessage::Connected(sender) => { handle_client_message_connected(sender, client_thread_state).await?; @@ -679,8 +694,8 @@ pub fn clone_block_detail(input: &BlockDetail) -> BlockDetail>, +async fn drop_failed_client_sends( + client_thread_state: Arc>>, failed_client_sends: Vec, ) { // Let's acquire our write lock @@ -699,10 +714,12 @@ async fn drop_failed_client_sends( /// [handle_received_block_detail] is a function that processes received Block /// details and will attempt to distribute the message to all of the clients /// that are subscribed to the latest block stream. -async fn handle_received_block_detail( - client_thread_state: Arc>, +async fn handle_received_block_detail( + client_thread_state: Arc>>, block_detail: BlockDetail, -) { +) where + K: Sink + Clone + Unpin, +{ let client_thread_state_read_lock_guard = client_thread_state.read().await; // These are the clients who are subscribed to the latest blocks, that @@ -757,10 +774,12 @@ async fn handle_received_block_detail( /// [handle_received_node_identity] is a function that processes received /// NodeIdentity and will attempt to distribute the message to all of the /// clients that are subscribed to the node identity stream. -async fn handle_received_node_identity( - client_thread_state: Arc>, +async fn handle_received_node_identity( + client_thread_state: Arc>>, node_identity: NodeIdentity, -) { +) where + K: Sink + Clone + Unpin, +{ let client_thread_state_read_lock_guard = client_thread_state.read().await; // These are the clients who are subscribed to the node identities, that @@ -815,10 +834,12 @@ async fn handle_received_node_identity( /// [handle_received_voters] is a function that processes received voters and /// will attempt to distribute the message to all of the clients that are /// subscribed to the voters stream. -async fn handle_received_voters( - client_thread_state: Arc>, +async fn handle_received_voters( + client_thread_state: Arc>>, voters: BitVec, -) { +) where + K: Sink + Clone + Unpin, +{ let client_thread_state_read_lock_guard = client_thread_state.read().await; // These are the clients who are subscribed to the node identities, that @@ -870,12 +891,13 @@ async fn handle_received_voters( /// [process_internal_client_message_stream] is a function that processes the /// client handling stream. This stream is responsible for managing the state /// of the connected clients, and their subscriptions. -pub async fn process_internal_client_message_stream( +pub async fn process_internal_client_message_stream( mut stream: S, data_state: Arc>, - client_thread_state: Arc>, + client_thread_state: Arc>>, ) where - S: Stream + Unpin, + S: Stream> + Unpin, + K: Sink + Clone + Unpin, { loop { let message_result = stream.next().await; @@ -901,11 +923,12 @@ pub async fn process_internal_client_message_stream( /// [process_distribute_block_detail_handling_stream] is a function that /// processes the the [Stream] of incoming [BlockDetail] and distributes them /// to all subscribed clients. -pub async fn process_distribute_block_detail_handling_stream( - client_thread_state: Arc>, +pub async fn process_distribute_block_detail_handling_stream( + client_thread_state: Arc>>, mut stream: S, ) where S: Stream> + Unpin, + K: Sink + Clone + Unpin, { loop { let block_detail_result = stream.next().await; @@ -924,11 +947,12 @@ pub async fn process_distribute_block_detail_handling_stream( /// [process_distribute_node_identity_handling_stream] is a function that /// processes the the [Stream] of incoming [NodeIdentity] and distributes them /// to all subscribed clients. -pub async fn process_distribute_node_identity_handling_stream( - client_thread_state: Arc>, +pub async fn process_distribute_node_identity_handling_stream( + client_thread_state: Arc>>, mut stream: S, ) where S: Stream + Unpin, + K: Sink + Clone + Unpin, { loop { let node_identity_result = stream.next().await; @@ -947,11 +971,12 @@ pub async fn process_distribute_node_identity_handling_stream( /// [process_distribute_voters_handling_stream] is a function that processes /// the the [Stream] of incoming [BitVec] and distributes them to all /// subscribed clients. -pub async fn process_distribute_voters_handling_stream( - client_thread_state: Arc>, +pub async fn process_distribute_voters_handling_stream( + client_thread_state: Arc>>, mut stream: S, ) where S: Stream> + Unpin, + K: Sink + Clone + Unpin, { loop { let voters_result = stream.next().await; @@ -986,12 +1011,15 @@ pub mod tests { }; use async_std::{prelude::FutureExt, sync::RwLock}; use bitvec::vec::BitVec; - use futures::{channel::mpsc, SinkExt, StreamExt}; + use futures::{ + channel::mpsc::{self, Sender}, + SinkExt, StreamExt, + }; use hotshot_types::{signature_key::BLSPubKey, traits::signature_key::SignatureKey}; use sequencer::{Leaf, NodeState, ValidatedState}; use std::{sync::Arc, time::Duration}; - pub fn create_test_client_thread_state() -> ClientThreadState { + pub fn create_test_client_thread_state() -> ClientThreadState> { ClientThreadState { clients: Default::default(), subscribed_latest_block: Default::default(), @@ -1797,7 +1825,7 @@ pub mod tests { } } - // The following tests codify assumptions being bad on behalf of the Sender + // The following tests codify assumptions being bad on behalf of the Sink // and Receivers provided by the async_std library. The purpose of these // tests are to document these assumptions, and add a test to ensure that // they behave as expected. If they ever do not behave as expected, then @@ -1837,7 +1865,7 @@ pub mod tests { } /// Tests the behavior of the sender and receiver when the receiver is - /// dropped before anything is sent across the Sender. + /// dropped before anything is sent across the Sink. /// /// This is a separate library test to ensure that the behavior that this /// library is built on top of does not introduce a change that would diff --git a/node-metrics/src/service/data_state/mod.rs b/node-metrics/src/service/data_state/mod.rs index 0173e707b..737277a51 100644 --- a/node-metrics/src/service/data_state/mod.rs +++ b/node-metrics/src/service/data_state/mod.rs @@ -4,10 +4,7 @@ pub mod node_identity; use async_std::sync::RwLock; use bitvec::vec::BitVec; use circular_buffer::CircularBuffer; -use futures::{ - channel::mpsc::{SendError, Sender}, - SinkExt, Stream, StreamExt, -}; +use futures::{channel::mpsc::SendError, Sink, SinkExt, Stream, StreamExt}; use hotshot_query_service::{ availability::QueryableHeader, explorer::{BlockDetail, ExplorerHeader, Timestamp}, @@ -193,17 +190,19 @@ impl std::error::Error for ProcessLeafError { /// [process_incoming_leaf] is a helper function that will process an incoming /// [Leaf] and update the [DataState] with the new information. /// Additionally, the block that is contained within the [Leaf] will be -/// computed into a [BlockDetail] and sent to the [Sender] so that it can be +/// computed into a [BlockDetail] and sent to the [Sink] so that it can be /// processed for real-time considerations. -async fn process_incoming_leaf( +async fn process_incoming_leaf( leaf: Leaf, data_state: Arc>, - mut block_sender: Sender>, - mut voters_sender: Sender>, + mut block_sender: BDSink, + mut voters_sender: BVSink, ) -> Result<(), ProcessLeafError> where Header: BlockHeader + QueryableHeader + ExplorerHeader, Payload: BlockPayload, + BDSink: Sink, Error = SendError> + Unpin, + BVSink: Sink, Error = SendError> + Unpin, { let block_detail = create_block_detail_from_leaf(&leaf); let block_detail_copy = create_block_detail_from_leaf(&leaf); @@ -292,15 +291,17 @@ where /// [process_leaf_stream] allows for the consumption of a [Stream] when /// attempting to process new incoming [Leaf]s. -pub async fn process_leaf_stream( +pub async fn process_leaf_stream( mut stream: S, data_state: Arc>, - block_sender: Sender>, - voters_senders: Sender>, + block_sender: BDSink, + voters_senders: BVSink, ) where S: Stream> + Unpin, Header: BlockHeader + QueryableHeader + ExplorerHeader, Payload: BlockPayload, + BDSink: Sink, Error = SendError> + Clone + Unpin, + BVSink: Sink, Error = SendError> + Clone + Unpin, { loop { let leaf_result = stream.next().await; @@ -360,13 +361,16 @@ impl From for ProcessNodeIdentityError { /// [process_incoming_node_identity] is a helper function that will process an /// incoming [NodeIdentity] and update the [DataState] with the new information. -/// Additionally, the [NodeIdentity] will be sent to the [Sender] so that it can +/// Additionally, the [NodeIdentity] will be sent to the [Sink] so that it can /// be processed for real-time considerations. -async fn process_incoming_node_identity( +async fn process_incoming_node_identity( node_identity: NodeIdentity, data_state: Arc>, - mut node_identity_sender: Sender, -) -> Result<(), ProcessNodeIdentityError> { + mut node_identity_sender: NISink, +) -> Result<(), ProcessNodeIdentityError> +where + NISink: Sink + Unpin, +{ let mut data_state_write_lock_guard = data_state.write().await; data_state_write_lock_guard.add_node_identity(node_identity.clone()); node_identity_sender.send(node_identity).await?; @@ -378,14 +382,15 @@ async fn process_incoming_node_identity( /// attempting to process new incoming [NodeIdentity]s. /// This function will process the incoming [NodeIdentity] and update the /// [DataState] with the new information. -/// Additionally, the [NodeIdentity] will be sent to the [Sender] so that it can +/// Additionally, the [NodeIdentity] will be sent to the [Sink] so that it can /// be processed for real-time considerations. -pub async fn process_node_identity_stream( +pub async fn process_node_identity_stream( mut stream: S, data_state: Arc>, - node_identity_sender: Sender, + node_identity_sender: NISink, ) where S: Stream + Unpin, + NISink: Sink + Clone + Unpin, { loop { let node_identity_result = stream.next().await; From cc2619238927b439411e1f535c241730ce356ef4 Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Fri, 19 Jul 2024 08:43:20 -0600 Subject: [PATCH 34/72] Fix inconsistent BLSPubKey decoding from str There are two ways that the BLSPubKey is being decoded from the string that yield inconsistent results. The first is `BLSPubKey::from_str` which seems to be the correct way to decode these values. The second was to decode the `str` into a `TaggedBase64` and then to take the containted data and pass it to `BLSPubKey::from_bytes` which matches the majority of the way, but seems to differ for some reason. --- node-metrics/src/api/node_validator/v0/mod.rs | 34 +++++++------------ 1 file changed, 13 insertions(+), 21 deletions(-) diff --git a/node-metrics/src/api/node_validator/v0/mod.rs b/node-metrics/src/api/node_validator/v0/mod.rs index c8da6dc44..3dba47333 100644 --- a/node-metrics/src/api/node_validator/v0/mod.rs +++ b/node-metrics/src/api/node_validator/v0/mod.rs @@ -578,15 +578,14 @@ pub fn populate_node_identity_from_scrape(node_identity: &mut NodeIdentity, scra let public_key_string = node_sample.labels.get("key"); - let public_key_from_scrape = if let Some(public_key_string) = public_key_string { - if let Ok(public_key) = BLSPubKey::from_str(public_key_string) { - public_key - } else { - // We were unable to parse the public key from the scrape result. - tracing::warn!( - "parsing public key failed, preventing us from verifying the public key" - ); - return; + let public_key_from_scrape: BLSPubKey = if let Some(public_key_string) = public_key_string { + match BLSPubKey::from_str(public_key_string) { + Ok(public_key) => public_key, + Err(err) => { + // We couldn't parse the public key, so we can't create a NodeIdentity. + tracing::info!("parsing public key failed: {}", err); + return; + } } } else { // We were unable to find the public key in the scrape result. @@ -594,7 +593,10 @@ pub fn populate_node_identity_from_scrape(node_identity: &mut NodeIdentity, scra return; }; - if &public_key_from_scrape != node_identity.public_key() { + let public_key_from_scrape_string = public_key_from_scrape.to_string(); + let node_identity_public_key_string = node_identity.public_key().to_string(); + + if public_key_from_scrape_string != node_identity_public_key_string { tracing::warn!("node identity public key doesn't match public key in scrape, are we hitting the wrong URL, or is it behind a load balancer between multiple nodes?"); return; } @@ -661,19 +663,9 @@ pub fn node_identity_from_scrape(scrape: Scrape) -> Option { let public_key_string = node_sample.labels.get("key")?; - // create the Tagged Base 64 Public Key representation - let tagged_base64 = - if let Ok(tagged_base64) = tagged_base64::TaggedBase64::parse(public_key_string) { - tagged_base64 - } else { - return None; - }; - - // Now we can take those bytes and we can create a Public Key from them. - let public_key = match BLSPubKey::from_bytes(tagged_base64.value().as_ref()) { + let public_key = match BLSPubKey::from_str(public_key_string) { Ok(public_key) => public_key, Err(err) => { - // We couldn't parse the public key, so we can't create a NodeIdentity. tracing::info!("parsing public key failed: {}", err); return None; } From 8dbb5250b0edd8cece8fcca0683f11a684d05743 Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Fri, 19 Jul 2024 08:46:26 -0600 Subject: [PATCH 35/72] Fix incomplete urls in skipped test --- node-metrics/src/api/node_validator/v0/mod.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/node-metrics/src/api/node_validator/v0/mod.rs b/node-metrics/src/api/node_validator/v0/mod.rs index 3dba47333..dbd0f7dc1 100644 --- a/node-metrics/src/api/node_validator/v0/mod.rs +++ b/node-metrics/src/api/node_validator/v0/mod.rs @@ -887,10 +887,10 @@ mod tests { { let urls = vec![ "http://localhost:24000/", - "http://localhost:24001", - "http://localhost:24002", - "http://localhost:24003", - "http://localhost:24004", + "http://localhost:24001/", + "http://localhost:24002/", + "http://localhost:24003/", + "http://localhost:24004/", ]; for url in urls { From 177e5bc124a0bafdeb048d1065fef93de8f342d6 Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Fri, 19 Jul 2024 08:46:49 -0600 Subject: [PATCH 36/72] Remove unused module from use statements --- node-metrics/src/api/node_validator/v0/mod.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/node-metrics/src/api/node_validator/v0/mod.rs b/node-metrics/src/api/node_validator/v0/mod.rs index dbd0f7dc1..9bd80365a 100644 --- a/node-metrics/src/api/node_validator/v0/mod.rs +++ b/node-metrics/src/api/node_validator/v0/mod.rs @@ -10,7 +10,6 @@ use futures::{ use hotshot_stake_table::vec_based::StakeTable; use hotshot_types::light_client::{CircuitField, StateVerKey}; use hotshot_types::signature_key::BLSPubKey; -use hotshot_types::traits::signature_key::SignatureKey; use hotshot_types::traits::{signature_key::StakeTableEntryType, stake_table::StakeTableScheme}; use hotshot_types::PeerConfig; use prometheus_parse::{Sample, Scrape}; From 39eefefb647ae197360e1fc3972146a9c75dd562 Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Fri, 19 Jul 2024 08:47:31 -0600 Subject: [PATCH 37/72] Improve performance issue in `create_block_detail_from_leaf` There's a performance issue in `create_block_detail_from_leaf` concerning creating a `BlockDetail` object from the `Leaf` and its `Payload. The primary cause of this performance issue involves attempting to determine the number of transactions and the byte size of the `Payload`. For both of these, the data was retrieved from the trait `QueryablePayload`. `BlockPayload::num_transactions` ultimately calls a helper method called `Queryable::transactions`. `Queryable::transactions` is implemented by `sequencer::block::full_payload::payload::Payload` by calling `QueryablePaylaod::enumerate`. The implementation of `QueryablePayload::enumerate` ultimately invokes `QueryablePayload::transaction` which is implemented by calling `QueryablePayload::transaction_with_proof`. This means that these convenience functions ultimately involve calculating a proof which does not get used. To fix this, we rely on the `QueryablePayload::iter` instead, as it is able to determine the number of transactions, and using it we can also take advantage of `Payload::transaction` to retrieve the transaction from the iterated `Index` objects. From there we can get the `size` of each `Trasnaction` contained within the `Payload` without needing to invoke proof considerations. --- node-metrics/src/service/data_state/mod.rs | 27 +++++++++++++++++----- 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/node-metrics/src/service/data_state/mod.rs b/node-metrics/src/service/data_state/mod.rs index 737277a51..c1bbeab63 100644 --- a/node-metrics/src/service/data_state/mod.rs +++ b/node-metrics/src/service/data_state/mod.rs @@ -6,7 +6,7 @@ use bitvec::vec::BitVec; use circular_buffer::CircularBuffer; use futures::{channel::mpsc::SendError, Sink, SinkExt, Stream, StreamExt}; use hotshot_query_service::{ - availability::QueryableHeader, + availability::{QueryableHeader, QueryablePayload}, explorer::{BlockDetail, ExplorerHeader, Timestamp}, Leaf, Resolvable, }; @@ -143,7 +143,24 @@ impl DataState { /// [BlockDetail] from the reference to [Leaf]. pub fn create_block_detail_from_leaf(leaf: &Leaf) -> BlockDetail { let block_header = leaf.block_header(); - let block_payload = leaf.block_payload().unwrap_or(Payload::empty().0); + let block_payload = &leaf.block_payload().unwrap_or(Payload::empty().0); + + let transaction_iter = block_payload.iter(block_header.metadata()); + + // Calculate the number of transactions and the total payload size of the + // transactions contained within the Payload. + let (num_transactions, total_payload_size) = transaction_iter.fold( + (0u64, 0u64), + |(num_transactions, total_payload_size), tx_index| { + ( + num_transactions + 1, + total_payload_size + + block_payload + .transaction(&tx_index) + .map_or(0u64, |tx| tx.payload().len() as u64), + ) + }, + ); BlockDetail:: { hash: block_header.commitment(), @@ -153,12 +170,10 @@ pub fn create_block_detail_from_leaf(leaf: &Leaf) -> BlockDetail Date: Tue, 23 Jul 2024 16:25:39 -0600 Subject: [PATCH 38/72] Fix types that were moved to espresso-types crate --- Cargo.lock | 77 +++---------------- node-metrics/Cargo.toml | 3 +- node-metrics/src/api/node_validator/v0/mod.rs | 20 ++--- node-metrics/src/service/client_state/mod.rs | 4 +- node-metrics/src/service/data_state/mod.rs | 13 ++-- .../src/service/data_state/node_identity.rs | 2 +- .../src/service/server_message/mod.rs | 2 +- 7 files changed, 29 insertions(+), 92 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 233f630d0..2311b0b5b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1327,7 +1327,7 @@ dependencies = [ "surf", "surf-disco", "tagged-base64", - "tide-disco 0.8.0", + "tide-disco", "tracing", "url", "vbs", @@ -4122,7 +4122,7 @@ dependencies = [ "serde", "snafu 0.8.4", "tagged-base64", - "tide-disco 0.8.0", + "tide-disco", "toml", "vbs", ] @@ -4151,7 +4151,7 @@ dependencies = [ "snafu 0.8.4", "surf-disco", "tagged-base64", - "tide-disco 0.8.0", + "tide-disco", "tokio", "tracing", "vbs", @@ -4200,7 +4200,7 @@ dependencies = [ "serde", "snafu 0.8.4", "tagged-base64", - "tide-disco 0.8.0", + "tide-disco", "toml", "tracing", "vbs", @@ -4291,7 +4291,7 @@ dependencies = [ "serde_json", "surf-disco", "thiserror", - "tide-disco 0.8.0", + "tide-disco", "tokio", "toml", "tracing", @@ -4342,7 +4342,7 @@ dependencies = [ "surf-disco", "tagged-base64", "tempfile", - "tide-disco 0.8.0", + "tide-disco", "time 0.3.36", "tokio", "tokio-postgres", @@ -4419,7 +4419,7 @@ dependencies = [ "snafu 0.8.4", "surf-disco", "tagged-base64", - "tide-disco 0.8.0", + "tide-disco", "time 0.3.36", "toml", "tracing", @@ -4515,7 +4515,7 @@ dependencies = [ "sha3", "snafu 0.8.4", "tagged-base64", - "tide-disco 0.8.0", + "tide-disco", "tokio", "tracing", "url", @@ -6446,6 +6446,7 @@ dependencies = [ "async-std", "bitvec", "circular-buffer", + "espresso-types", "futures", "hotshot-query-service", "hotshot-stake-table", @@ -6458,7 +6459,7 @@ dependencies = [ "serde_json", "surf-disco", "tagged-base64", - "tide-disco 0.9.0", + "tide-disco", "time 0.3.36", "toml", "tracing", @@ -8500,7 +8501,7 @@ dependencies = [ "tagged-base64", "tempfile", "thiserror", - "tide-disco 0.8.0", + "tide-disco", "time 0.3.36", "tokio-postgres", "toml", @@ -9373,7 +9374,7 @@ dependencies = [ "reqwest 0.12.5", "serde", "serde_json", - "tide-disco 0.8.0", + "tide-disco", "tracing", "vbs", ] @@ -9723,60 +9724,6 @@ dependencies = [ "vbs", ] -[[package]] -name = "tide-disco" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01e81752cd71cc517973c6ff743919848d7c9890331f38c43abadfb49eede3a6" -dependencies = [ - "anyhow", - "async-h1", - "async-lock 3.4.0", - "async-std", - "async-trait", - "clap", - "config", - "derivative", - "derive_more", - "dirs", - "edit-distance", - "futures", - "futures-util", - "http 1.1.0", - "include_dir", - "itertools 0.12.1", - "lazy_static", - "libc", - "markdown", - "maud", - "parking_lot", - "pin-project", - "prometheus", - "reqwest 0.12.5", - "routefinder", - "semver 1.0.23", - "serde", - "serde_json", - "serde_with", - "shellexpand", - "signal-hook", - "signal-hook-async-std", - "snafu 0.8.3", - "strum", - "strum_macros", - "tagged-base64", - "tide", - "tide-websockets", - "toml", - "tracing", - "tracing-distributed", - "tracing-futures", - "tracing-log", - "tracing-subscriber 0.3.18", - "url", - "vbs", -] - [[package]] name = "tide-websockets" version = "0.4.0" diff --git a/node-metrics/Cargo.toml b/node-metrics/Cargo.toml index 1c46e578f..f2b058641 100644 --- a/node-metrics/Cargo.toml +++ b/node-metrics/Cargo.toml @@ -6,13 +6,14 @@ authors = { workspace = true } edition = { workspace = true } [features] -testing = ["sequencer/testing", "serde_json"] +testing = ["sequencer/testing", "serde_json", "espresso-types/testing"] [dependencies] async-compatibility-layer = { workspace = true } async-std = { workspace = true } bitvec = { workspace = true } circular-buffer = { workspace = true } +espresso-types = { path = "../types" } futures = { workspace = true } hotshot-query-service = { workspace = true } hotshot-stake-table = { workspace = true } diff --git a/node-metrics/src/api/node_validator/v0/mod.rs b/node-metrics/src/api/node_validator/v0/mod.rs index 9bd80365a..35d6aecec 100644 --- a/node-metrics/src/api/node_validator/v0/mod.rs +++ b/node-metrics/src/api/node_validator/v0/mod.rs @@ -1,6 +1,7 @@ use crate::service::client_message::{ClientMessage, InternalClientMessage}; use crate::service::data_state::{LocationDetails, NodeIdentity}; use crate::service::server_message::ServerMessage; +use espresso_types::FeeAccount; use futures::future::Either; use futures::Sink; use futures::{ @@ -13,7 +14,6 @@ use hotshot_types::signature_key::BLSPubKey; use hotshot_types::traits::{signature_key::StakeTableEntryType, stake_table::StakeTableScheme}; use hotshot_types::PeerConfig; use prometheus_parse::{Sample, Scrape}; -use sequencer::state::FeeAccount; use serde::{Deserialize, Serialize}; use std::fmt; use std::io::BufRead; @@ -75,16 +75,6 @@ impl tide_disco::Error for Error { } } -impl surf_disco::Error for Error { - fn catch_all(status: surf_disco::StatusCode, msg: String) -> Self { - Self::UnhandledSurfDisco(status, msg) - } - - fn status(&self) -> surf_disco::StatusCode { - surf_disco::StatusCode::INTERNAL_SERVER_ERROR - } -} - #[derive(Debug)] pub enum LoadApiError { Toml(toml::de::Error), @@ -418,7 +408,7 @@ pub async fn stream_leaves_from_hotshot_query_service( current_block_height: Option, client: surf_disco::Client, ) -> Result< - impl futures::Stream> + Unpin, + impl futures::Stream> + Unpin, hotshot_query_service::Error, > { let block_height_result = client.get("status/block-height").send().await; @@ -442,7 +432,7 @@ pub async fn stream_leaves_from_hotshot_query_service( "availability/stream/leaves/{}", start_block_height )) - .subscribe::() + .subscribe::() .await; let leaves_stream = match leaves_stream_result { @@ -737,11 +727,11 @@ mod tests { server_message::ServerMessage, }; use async_std::sync::RwLock; + use espresso_types::FeeAccount; use futures::{ channel::mpsc::{self, Sender}, SinkExt, StreamExt, }; - use sequencer::state::FeeAccount; use std::{ io::{BufRead, BufReader}, str::FromStr, @@ -932,7 +922,7 @@ consensus_libp2p_num_connected_peers 4 consensus_libp2p_num_failed_messages 0 # HELP consensus_node node # TYPE consensus_node gauge -consensus_node{key=\"BLS_VER_KEY~bQszS-QKYvUij2g20VqS8asttGSb95NrTu2PUj0uMh1CBUxNy1FqyPDjZqB29M7ZbjWqj79QkEOWkpga84AmDYUeTuWmy-0P1AdKHD3ehc-dKvei78BDj5USwXPJiDUlCxvYs_9rWYhagaq-5_LXENr78xel17spftNd5MA1Mw5U\"} 1 +consensus_node{key=\"BLS_VER_KEY~bQszS-QKYvUij2g20VqS8asttGSb95NrTu2PUj0uMh1CBUxNy1FqyPDjZqB29M7ZbjWqj79QkEOWkpga84AmDUseTuWmy-0P1AdKHD3ehc-dKvei78BDj5USwXPJiDUlCxvYs_9rWYhagaq-5_LXENr78xel17spfAnd5MA1Mw5U\"} 1 # HELP consensus_node_identity_general node_identity_general # TYPE consensus_node_identity_general gauge consensus_node_identity_general{company_name=\"Espresso Systems\",name=\"sequencer0\",network_type=\"local\",node_type=\"espresso-sequencer 0.1\",operating_system=\"Linux 5.15.153.1\",wallet=\"0x0000000000000000000000000000000000000000\"} 1 diff --git a/node-metrics/src/service/client_state/mod.rs b/node-metrics/src/service/client_state/mod.rs index 25d21403b..4e13fa042 100644 --- a/node-metrics/src/service/client_state/mod.rs +++ b/node-metrics/src/service/client_state/mod.rs @@ -6,9 +6,9 @@ use super::{ }; use async_std::sync::{RwLock, RwLockWriteGuard}; use bitvec::vec::BitVec; +use espresso_types::SeqTypes; use futures::{channel::mpsc::SendError, Sink, SinkExt, Stream, StreamExt}; use hotshot_query_service::explorer::{BlockDetail, ExplorerHistograms}; -use sequencer::SeqTypes; use std::{ collections::{HashMap, HashSet}, sync::Arc, @@ -1011,12 +1011,12 @@ pub mod tests { }; use async_std::{prelude::FutureExt, sync::RwLock}; use bitvec::vec::BitVec; + use espresso_types::{Leaf, NodeState, ValidatedState}; use futures::{ channel::mpsc::{self, Sender}, SinkExt, StreamExt, }; use hotshot_types::{signature_key::BLSPubKey, traits::signature_key::SignatureKey}; - use sequencer::{Leaf, NodeState, ValidatedState}; use std::{sync::Arc, time::Duration}; pub fn create_test_client_thread_state() -> ClientThreadState> { diff --git a/node-metrics/src/service/data_state/mod.rs b/node-metrics/src/service/data_state/mod.rs index c1bbeab63..3596a3751 100644 --- a/node-metrics/src/service/data_state/mod.rs +++ b/node-metrics/src/service/data_state/mod.rs @@ -4,6 +4,7 @@ pub mod node_identity; use async_std::sync::RwLock; use bitvec::vec::BitVec; use circular_buffer::CircularBuffer; +use espresso_types::{Header, Payload, SeqTypes}; use futures::{channel::mpsc::SendError, Sink, SinkExt, Stream, StreamExt}; use hotshot_query_service::{ availability::{QueryableHeader, QueryablePayload}, @@ -22,7 +23,6 @@ use hotshot_types::{ }; pub use location_details::LocationDetails; pub use node_identity::NodeIdentity; -use sequencer::{Header, Payload, SeqTypes}; use std::{collections::HashSet, iter::zip, sync::Arc}; use time::OffsetDateTime; @@ -164,9 +164,9 @@ pub fn create_block_detail_from_leaf(leaf: &Leaf) -> BlockDetail { hash: block_header.commitment(), - height: block_header.height, + height: block_header.height(), time: Timestamp( - OffsetDateTime::from_unix_timestamp(block_header.timestamp as i64) + OffsetDateTime::from_unix_timestamp(block_header.timestamp() as i64) .unwrap_or(OffsetDateTime::UNIX_EPOCH), ), proposer_id: block_header.proposer_id(), @@ -441,12 +441,11 @@ mod tests { use super::{process_leaf_stream, DataState}; use crate::service::data_state::{process_node_identity_stream, LocationDetails, NodeIdentity}; use async_std::{prelude::FutureExt, sync::RwLock}; + use espresso_types::{ + BlockMerkleTree, ChainConfig, FeeAccount, FeeMerkleTree, Leaf, NodeState, ValidatedState, + }; use futures::{channel::mpsc, SinkExt, StreamExt}; use hotshot_types::{signature_key::BLSPubKey, traits::signature_key::SignatureKey}; - use sequencer::{ - state::{BlockMerkleTree, FeeAccount, FeeMerkleTree}, - ChainConfig, Leaf, NodeState, ValidatedState, - }; use std::{sync::Arc, time::Duration}; use url::Url; diff --git a/node-metrics/src/service/data_state/node_identity.rs b/node-metrics/src/service/data_state/node_identity.rs index 5edd11fa4..7578971d6 100644 --- a/node-metrics/src/service/data_state/node_identity.rs +++ b/node-metrics/src/service/data_state/node_identity.rs @@ -1,6 +1,6 @@ use super::LocationDetails; +use espresso_types::FeeAccount; use hotshot_types::signature_key::BLSPubKey; -use sequencer::state::FeeAccount; use serde::{Deserialize, Serialize}; use surf_disco::Url; diff --git a/node-metrics/src/service/server_message/mod.rs b/node-metrics/src/service/server_message/mod.rs index 5bc348c56..9cb7cd798 100644 --- a/node-metrics/src/service/server_message/mod.rs +++ b/node-metrics/src/service/server_message/mod.rs @@ -2,8 +2,8 @@ use std::sync::Arc; use super::{client_id::ClientId, data_state::NodeIdentity}; use bitvec::vec::BitVec; +use espresso_types::SeqTypes; use hotshot_query_service::explorer::{BlockDetail, ExplorerHistograms}; -use sequencer::SeqTypes; use serde::{Deserialize, Serialize}; /// [ServerMessage] represents the messages that the server can send to the From 69593d0d2ffaa96a0dbcb1ef7fb1f5fa95fbf602 Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Tue, 23 Jul 2024 17:55:03 -0600 Subject: [PATCH 39/72] Add simple node_validator_api creation function Refactor modify test to use new function --- .../v0/create_node_validator_api.rs | 237 ++++++++++++++++++ node-metrics/src/api/node_validator/v0/mod.rs | 180 +------------ 2 files changed, 238 insertions(+), 179 deletions(-) create mode 100644 node-metrics/src/api/node_validator/v0/create_node_validator_api.rs diff --git a/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs b/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs new file mode 100644 index 000000000..9be7185b6 --- /dev/null +++ b/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs @@ -0,0 +1,237 @@ +use std::sync::Arc; + +use super::{ + get_stake_table_from_sequencer, process_node_identity_url_stream, + stream_leaves_from_hotshot_query_service, StateClientMessageSender, STATIC_VER_0_1, +}; +use crate::service::{ + client_id::ClientId, + client_message::InternalClientMessage, + client_state::{ + process_distribute_block_detail_handling_stream, + process_distribute_node_identity_handling_stream, + process_distribute_voters_handling_stream, process_internal_client_message_stream, + ClientThreadState, + }, + data_state::{process_leaf_stream, process_node_identity_stream, DataState}, + server_message::ServerMessage, +}; +use async_std::{stream::StreamExt, sync::RwLock, task::JoinHandle}; +use futures::{ + channel::mpsc::{self, Sender}, + SinkExt, +}; +use tide_disco::App; +use url::Url; + +pub struct NodeValidatorAPIState { + pub sender: Sender>>, +} + +impl StateClientMessageSender> for NodeValidatorAPIState { + fn sender(&self) -> Sender>> { + self.sender.clone() + } +} + +pub struct NodeValidatorAPI { + pub task_handles: Vec>, +} + +pub struct NodeValidatorConfig { + pub bind_address: String, + pub stake_table_url_base: Url, + pub initial_node_public_base_urls: Vec, +} + +pub async fn create_node_validator_api( + config: NodeValidatorConfig, +) -> (NodeValidatorAPI, JoinHandle<()>) { + let node_validator_api_result = super::define_api::(); + + let node_validator_api = match node_validator_api_result { + Ok(api) => api, + Err(e) => { + panic!("Error: {:?}", e); + } + }; + + let (server_message_sender, server_message_receiver) = mpsc::channel(32); + let mut app: App = + App::with_state(NodeValidatorAPIState { + sender: server_message_sender, + }); + let register_module_result = app.register_module("node-validator", node_validator_api); + + if let Err(e) = register_module_result { + panic!("Error: {:?}", e); + } + + let mut data_state = DataState::new( + Default::default(), + Default::default(), + Default::default(), + Default::default(), + ); + + let client_thread_state = ClientThreadState::>::new( + Default::default(), + Default::default(), + Default::default(), + Default::default(), + ClientId::from_count(1), + ); + + let client = surf_disco::Client::new( + // "https://query.cappuccino.testnet.espresso.network/v0" + config.stake_table_url_base, + ); + + let get_stake_table_result = get_stake_table_from_sequencer(client.clone()).await; + let stake_table = get_stake_table_result.unwrap(); + data_state.replace_stake_table(stake_table); + + let data_state = Arc::new(RwLock::new(data_state)); + let client_thread_state = Arc::new(RwLock::new(client_thread_state)); + let (block_detail_sender, block_detail_receiver) = mpsc::channel(32); + let (leaf_sender, leaf_receiver) = mpsc::channel(32); + let (node_identity_sender_1, node_identity_receiver_1) = mpsc::channel(32); + let (node_identity_sender_2, node_identity_receiver_2) = mpsc::channel(32); + let (voters_sender, voters_receiver) = mpsc::channel(32); + let (mut url_sender, url_receiver) = mpsc::channel(32); + + let process_internal_client_message_handle = + async_std::task::spawn(process_internal_client_message_stream( + server_message_receiver, + data_state.clone(), + client_thread_state.clone(), + )); + + let process_distribute_block_detail_handle = + async_std::task::spawn(process_distribute_block_detail_handling_stream( + client_thread_state.clone(), + block_detail_receiver, + )); + + let process_distribute_node_identity_handle = + async_std::task::spawn(process_distribute_node_identity_handling_stream( + client_thread_state.clone(), + node_identity_receiver_2, + )); + + let process_distribute_voters_handle = async_std::task::spawn( + process_distribute_voters_handling_stream(client_thread_state.clone(), voters_receiver), + ); + + let process_leaf_stream_handle = async_std::task::spawn(process_leaf_stream( + leaf_receiver, + data_state.clone(), + block_detail_sender, + voters_sender, + )); + + let process_node_identity_stream_handle = async_std::task::spawn(process_node_identity_stream( + node_identity_receiver_1, + data_state.clone(), + node_identity_sender_2, + )); + + let process_url_stream_handle = async_std::task::spawn(process_node_identity_url_stream( + url_receiver, + node_identity_sender_1, + )); + + let leaf_retriever_handle = async_std::task::spawn(async move { + // Alright, let's get some leaves, bro + + let client = client; + + let mut leaf_stream = stream_leaves_from_hotshot_query_service(None, client) + .await + .unwrap(); + + let mut leaf_sender = leaf_sender; + + loop { + let leaf_result = leaf_stream.next().await; + let leaf = if let Some(Ok(leaf)) = leaf_result { + leaf + } else { + tracing::info!("leaf stream closed"); + break; + }; + + let leaf_send_result = leaf_sender.send(leaf).await; + if let Err(err) = leaf_send_result { + tracing::info!("leaf sender closed: {}", err); + break; + } + } + }); + + // send the original three node base urls + // This is assuming that demo-native is running, as such those Urls + // should be used / match + { + let urls = config.initial_node_public_base_urls; + + for url in urls { + let send_result = url_sender.send(url).await; + if let Err(err) = send_result { + tracing::info!("url sender closed: {}", err); + break; + } + } + } + + let app_serve_handle = async_std::task::spawn(async move { + let app_serve_result = app.serve("0.0.0.0:9000", STATIC_VER_0_1).await; + tracing::info!("app serve result: {:?}", app_serve_result); + }); + + tracing::info!("listening on: {:?}", config.bind_address); + + ( + NodeValidatorAPI { + task_handles: vec![ + process_internal_client_message_handle, + process_distribute_block_detail_handle, + process_distribute_node_identity_handle, + process_distribute_voters_handle, + process_leaf_stream_handle, + process_node_identity_stream_handle, + process_url_stream_handle, + leaf_retriever_handle, + ], + }, + app_serve_handle, + ) +} + +mod test { + + #[async_std::test] + #[ignore] + async fn test_full_setup_example() { + let (node_validator_api, app_serve_handle) = + super::create_node_validator_api(super::NodeValidatorConfig { + bind_address: "0.0.0.0:9000".to_string(), + stake_table_url_base: "http://localhost:24000/v0".parse().unwrap(), + initial_node_public_base_urls: vec![ + "http://localhost:24000/".parse().unwrap(), + "http://localhost:24001/".parse().unwrap(), + "http://localhost:24002/".parse().unwrap(), + "http://localhost:24003/".parse().unwrap(), + "http://localhost:24004/".parse().unwrap(), + ], + }) + .await; + + // We would like to wait until being signaled + app_serve_handle.await; + + for handle in node_validator_api.task_handles { + handle.cancel().await; + } + } +} diff --git a/node-metrics/src/api/node_validator/v0/mod.rs b/node-metrics/src/api/node_validator/v0/mod.rs index 35d6aecec..80ca925dc 100644 --- a/node-metrics/src/api/node_validator/v0/mod.rs +++ b/node-metrics/src/api/node_validator/v0/mod.rs @@ -1,3 +1,4 @@ +pub mod create_node_validator_api; use crate::service::client_message::{ClientMessage, InternalClientMessage}; use crate::service::data_state::{LocationDetails, NodeIdentity}; use crate::service::server_message::ServerMessage; @@ -710,190 +711,11 @@ pub async fn process_node_identity_url_stream( } #[cfg(test)] mod tests { - use super::{ - get_stake_table_from_sequencer, process_node_identity_url_stream, - stream_leaves_from_hotshot_query_service, Error, StateClientMessageSender, STATIC_VER_0_1, - }; - use crate::service::{ - client_id::ClientId, - client_message::InternalClientMessage, - client_state::{ - process_distribute_block_detail_handling_stream, - process_distribute_node_identity_handling_stream, - process_distribute_voters_handling_stream, process_internal_client_message_stream, - ClientThreadState, - }, - data_state::{process_leaf_stream, process_node_identity_stream, DataState}, - server_message::ServerMessage, - }; - use async_std::sync::RwLock; use espresso_types::FeeAccount; - use futures::{ - channel::mpsc::{self, Sender}, - SinkExt, StreamExt, - }; use std::{ io::{BufRead, BufReader}, str::FromStr, - sync::Arc, }; - use tide_disco::App; - - struct TestState(Sender>>); - - impl StateClientMessageSender> for TestState { - fn sender(&self) -> Sender>> { - self.0.clone() - } - } - - #[async_std::test] - #[ignore] - async fn test_full_setup_example() { - let node_validator_api_result = super::define_api::(); - - let node_validator_api = match node_validator_api_result { - Ok(api) => api, - Err(e) => { - panic!("Error: {:?}", e); - } - }; - - let (sender, receiver) = mpsc::channel(32); - let mut app: App = App::with_state(TestState(sender)); - let register_module_result = app.register_module("node-validator", node_validator_api); - - if let Err(e) = register_module_result { - panic!("Error: {:?}", e); - } - - let mut data_state = DataState::new( - Default::default(), - Default::default(), - Default::default(), - Default::default(), - ); - - let client_thread_state = ClientThreadState::>::new( - Default::default(), - Default::default(), - Default::default(), - Default::default(), - ClientId::from_count(1), - ); - - let client = surf_disco::Client::new( - // "https://query.cappuccino.testnet.espresso.network/v0" - "http://localhost:24000/v0".parse().unwrap(), - ); - - let get_stake_table_result = get_stake_table_from_sequencer(client.clone()).await; - let stake_table = get_stake_table_result.unwrap(); - data_state.replace_stake_table(stake_table); - - let data_state = Arc::new(RwLock::new(data_state)); - let client_thread_state = Arc::new(RwLock::new(client_thread_state)); - let (block_detail_sender, block_detail_receiver) = mpsc::channel(32); - let (leaf_sender, leaf_receiver) = mpsc::channel(32); - let (node_identity_sender_1, node_identity_receiver_1) = mpsc::channel(32); - let (node_identity_sender_2, node_identity_receiver_2) = mpsc::channel(32); - let (voters_sender, voters_receiver) = mpsc::channel(32); - let (mut url_sender, url_receiver) = mpsc::channel(32); - - let _process_internal_client_message_handle = - async_std::task::spawn(process_internal_client_message_stream( - receiver, - data_state.clone(), - client_thread_state.clone(), - )); - - let _process_distribute_block_detail_handle = - async_std::task::spawn(process_distribute_block_detail_handling_stream( - client_thread_state.clone(), - block_detail_receiver, - )); - - let _process_distribute_node_identity_handle = - async_std::task::spawn(process_distribute_node_identity_handling_stream( - client_thread_state.clone(), - node_identity_receiver_2, - )); - - let _process_distribute_voters_handle = async_std::task::spawn( - process_distribute_voters_handling_stream(client_thread_state.clone(), voters_receiver), - ); - - let _process_leaf_stream_handle = async_std::task::spawn(process_leaf_stream( - leaf_receiver, - data_state.clone(), - block_detail_sender, - voters_sender, - )); - - let _process_node_identity_stream_handle = - async_std::task::spawn(process_node_identity_stream( - node_identity_receiver_1, - data_state.clone(), - node_identity_sender_2, - )); - - let _process_url_stream_handle = async_std::task::spawn(process_node_identity_url_stream( - url_receiver, - node_identity_sender_1, - )); - - let _leaf_retriever_handle = async_std::task::spawn(async move { - // Alright, let's get some leaves, bro - - let client = client; - - let mut leaf_stream = stream_leaves_from_hotshot_query_service(None, client) - .await - .unwrap(); - - let mut leaf_sender = leaf_sender; - - loop { - let leaf_result = leaf_stream.next().await; - let leaf = if let Some(Ok(leaf)) = leaf_result { - leaf - } else { - tracing::info!("leaf stream closed"); - break; - }; - - let leaf_send_result = leaf_sender.send(leaf).await; - if let Err(err) = leaf_send_result { - tracing::info!("leaf sender closed: {}", err); - break; - } - } - }); - - // send the original three node base urls - // This is assuming that demo-native is running, as such those Urls - // should be used / match - { - let urls = vec![ - "http://localhost:24000/", - "http://localhost:24001/", - "http://localhost:24002/", - "http://localhost:24003/", - "http://localhost:24004/", - ]; - - for url in urls { - let url = url.parse().unwrap(); - let send_result = url_sender.send(url).await; - if let Err(err) = send_result { - tracing::info!("url sender closed: {}", err); - break; - } - } - } - - let _app_serve_result = app.serve("0.0.0.0:9000", STATIC_VER_0_1).await; - } fn example_prometheus_output() -> &'static str { "# HELP consensus_cdn_num_failed_messages num_failed_messages From ae30f18cca3b5ae98234739f9fd3b222cea6f739 Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Wed, 24 Jul 2024 07:16:50 -0600 Subject: [PATCH 40/72] Rename Node Identity Environment variables The Node Identity environment variables don't match the existing name scheme that we use for the other Sequencer environment variables. Namely they don't start with the prefix "ESPRESSO_SEQUENCE". I believe this may cause a disconnect that would imply that these variables are not used within the sequencer themselves, but this is actually the only place where these variables are used. As such, this renames them to match the other variables. --- docker-compose.yaml | 82 ++++++++++++++++++++++---------------------- process-compose.yaml | 82 ++++++++++++++++++++++---------------------- sequencer/src/lib.rs | 18 +++++----- 3 files changed, 91 insertions(+), 91 deletions(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index 01aff2fa6..a88e5edeb 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -236,17 +236,17 @@ services: - ESPRESSO_SEQUENCER_LIBP2P_BIND_ADDRESS=0.0.0.0:$ESPRESSO_DEMO_SEQUENCER_LIBP2P_PORT_0 - ESPRESSO_SEQUENCER_LIBP2P_ADVERTISE_ADDRESS=sequencer0:$ESPRESSO_DEMO_SEQUENCER_LIBP2P_PORT_0 - ESPRESSO_SEQUENCER_IS_DA=true + - ESPRESSO_SEQUENCER_IDENTITY_NODE_NAME=sequencer0 + - ESPRESSO_SEQUENCER_IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000000 + - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_NAME=Espresso Systems + - ESPRESSO_SEQUENCER_IDENTITY_OPERATING_SYSTEM=Linux 5.15.153.1 + - ESPRESSO_SEQUENCER_IDENTITY_NETWORK_TYPE=local + - ESPRESSO_SEQUENCER_IDENTITY_COUNTRY_CODE=US + - ESPRESSO_SEQUENCER_IDENTITY_LATITUDE=40.7128 + - ESPRESSO_SEQUENCER_IDENTITY_LONGITUDE=-74.0060 - RUST_LOG - RUST_LOG_FORMAT - ASYNC_STD_THREAD_COUNT - - IDENTITY_NODE_NAME=sequencer0 - - IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000000 - - IDENTITY_COMPANY_NAME=Espresso Systems - - IDENTITY_OPERATING_SYSTEM=Linux 5.15.153.1 - - IDENTITY_NETWORK_TYPE=local - - IDENTITY_COUNTRY_CODE=US - - IDENTITY_LATITUDE=40.7128 - - IDENTITY_LONGITUDE=-74.0060 depends_on: orchestrator: condition: service_healthy @@ -290,17 +290,17 @@ services: - ESPRESSO_SEQUENCER_LIBP2P_BIND_ADDRESS=0.0.0.0:$ESPRESSO_DEMO_SEQUENCER_LIBP2P_PORT_1 - ESPRESSO_SEQUENCER_LIBP2P_ADVERTISE_ADDRESS=sequencer1:$ESPRESSO_DEMO_SEQUENCER_LIBP2P_PORT_1 - ESPRESSO_SEQUENCER_IS_DA=true + - ESPRESSO_SEQUENCER_IDENTITY_NODE_NAME=sequencer1 + - ESPRESSO_SEQUENCER_IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000001 + - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_NAME=Espresso Systems + - ESPRESSO_SEQUENCER_IDENTITY_OPERATING_SYSTEM=Darwin 23.5.0 + - ESPRESSO_SEQUENCER_IDENTITY_NETWORK_TYPE=local + - ESPRESSO_SEQUENCER_IDENTITY_COUNTRY_CODE=GR + - ESPRESSO_SEQUENCER_IDENTITY_LATITUDE=39.0742 + - ESPRESSO_SEQUENCER_IDENTITY_LONGITUDE=21.8243 - RUST_LOG - RUST_LOG_FORMAT - ASYNC_STD_THREAD_COUNT - - IDENTITY_NODE_NAME=sequencer1 - - IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000001 - - IDENTITY_COMPANY_NAME=Espresso Systems - - IDENTITY_OPERATING_SYSTEM=Darwin 23.5.0 - - IDENTITY_NETWORK_TYPE=local - - IDENTITY_COUNTRY_CODE=GR - - IDENTITY_LATITUDE=39.0742 - - IDENTITY_LONGITUDE=21.8243 depends_on: orchestrator: condition: service_healthy @@ -340,17 +340,17 @@ services: - ESPRESSO_SEQUENCER_LIBP2P_BIND_ADDRESS=0.0.0.0:$ESPRESSO_DEMO_SEQUENCER_LIBP2P_PORT_2 - ESPRESSO_SEQUENCER_LIBP2P_ADVERTISE_ADDRESS=sequencer2:$ESPRESSO_DEMO_SEQUENCER_LIBP2P_PORT_2 - ESPRESSO_SEQUENCER_IS_DA=true + - ESPRESSO_SEQUENCER_IDENTITY_NODE_NAME=sequencer2 + - ESPRESSO_SEQUENCER_IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000002 + - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_NAME=Espresso Systems + - ESPRESSO_SEQUENCER_IDENTITY_OPERATING_SYSTEM=Darwin 23.5.0 + - ESPRESSO_SEQUENCER_IDENTITY_NETWORK_TYPE=local + - ESPRESSO_SEQUENCER_IDENTITY_COUNTRY_CODE=CN + - ESPRESSO_SEQUENCER_IDENTITY_LATITUDE=35.8617 + - ESPRESSO_SEQUENCER_IDENTITY_LONGITUDE=104.1954 - RUST_LOG - RUST_LOG_FORMAT - ASYNC_STD_THREAD_COUNT - - IDENTITY_NODE_NAME=sequencer2 - - IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000002 - - IDENTITY_COMPANY_NAME=Espresso Systems - - IDENTITY_OPERATING_SYSTEM=Darwin 23.5.0 - - IDENTITY_NETWORK_TYPE=local - - IDENTITY_COUNTRY_CODE=CN - - IDENTITY_LATITUDE=35.8617 - - IDENTITY_LONGITUDE=104.1954 depends_on: orchestrator: condition: service_healthy @@ -386,18 +386,18 @@ services: - ESPRESSO_SEQUENCER_PRIVATE_STATE_KEY=$ESPRESSO_DEMO_SEQUENCER_STATE_PRIVATE_KEY_3 - ESPRESSO_SEQUENCER_LIBP2P_BIND_ADDRESS=0.0.0.0:$ESPRESSO_DEMO_SEQUENCER_LIBP2P_PORT_3 - ESPRESSO_SEQUENCER_LIBP2P_ADVERTISE_ADDRESS=sequencer3:$ESPRESSO_DEMO_SEQUENCER_LIBP2P_PORT_3 + - ESPRESSO_SEQUENCER_IDENTITY_NODE_NAME=sequencer3 + - ESPRESSO_SEQUENCER_IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000003 + - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_NAME=Espresso Systems + - ESPRESSO_SEQUENCER_IDENTITY_OPERATING_SYSTEM=Microsoft Windows NT 10.0.22621.0 + - ESPRESSO_SEQUENCER_IDENTITY_E=espresso-sequencer@0.1.0 + - ESPRESSO_SEQUENCER_IDENTITY_NETWORK_TYPE=local + - ESPRESSO_SEQUENCER_IDENTITY_COUNTRY_CODE=CN + - ESPRESSO_SEQUENCER_IDENTITY_LATITUDE=35.8617 + - ESPRESSO_SEQUENCER_IDENTITY_LONGITUDE=104.1954 - RUST_LOG - RUST_LOG_FORMAT - ASYNC_STD_THREAD_COUNT - - IDENTITY_NODE_NAME=sequencer3 - - IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000003 - - IDENTITY_COMPANY_NAME=Espresso Systems - - IDENTITY_OPERATING_SYSTEM=Microsoft Windows NT 10.0.22621.0 - - IDENTITY_E=espresso-sequencer@0.1.0 - - IDENTITY_NETWORK_TYPE=local - - IDENTITY_COUNTRY_CODE=CN - - IDENTITY_LATITUDE=35.8617 - - IDENTITY_LONGITUDE=104.1954 depends_on: orchestrator: condition: service_healthy @@ -433,17 +433,17 @@ services: - ESPRESSO_SEQUENCER_PRIVATE_STATE_KEY=$ESPRESSO_DEMO_SEQUENCER_STATE_PRIVATE_KEY_4 - ESPRESSO_SEQUENCER_LIBP2P_BIND_ADDRESS=0.0.0.0:$ESPRESSO_DEMO_SEQUENCER_LIBP2P_PORT_4 - ESPRESSO_SEQUENCER_LIBP2P_ADVERTISE_ADDRESS=sequencer4:$ESPRESSO_DEMO_SEQUENCER_LIBP2P_PORT_4 + - ESPRESSO_SEQUENCER_IDENTITY_NODE_NAME=sequencer4 + - ESPRESSO_SEQUENCER_IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000004 + - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_NAME=Espresso Systems + - ESPRESSO_SEQUENCER_IDENTITY_OPERATING_SYSTEM=TempleOS 5.03 + - ESPRESSO_SEQUENCER_IDENTITY_NETWORK_TYPE=local + - ESPRESSO_SEQUENCER_IDENTITY_COUNTRY_CODE=AU + - ESPRESSO_SEQUENCER_IDENTITY_LATITUDE=-25.2744 + - ESPRESSO_SEQUENCER_IDENTITY_LONGITUDE=133.7751 - RUST_LOG - RUST_LOG_FORMAT - ASYNC_STD_THREAD_COUNT - - IDENTITY_NODE_NAME=sequencer4 - - IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000004 - - IDENTITY_COMPANY_NAME=Espresso Systems - - IDENTITY_OPERATING_SYSTEM=TempleOS 5.03 - - IDENTITY_NETWORK_TYPE=local - - IDENTITY_COUNTRY_CODE=AU - - IDENTITY_LATITUDE=-25.2744 - - IDENTITY_LONGITUDE=133.7751 depends_on: orchestrator: condition: service_healthy diff --git a/process-compose.yaml b/process-compose.yaml index e84451d1e..9efd3fa0d 100644 --- a/process-compose.yaml +++ b/process-compose.yaml @@ -117,14 +117,14 @@ processes: - ESPRESSO_SEQUENCER_PRIVATE_STAKING_KEY=$ESPRESSO_DEMO_SEQUENCER_STAKING_PRIVATE_KEY_0 - ESPRESSO_SEQUENCER_PRIVATE_STATE_KEY=$ESPRESSO_DEMO_SEQUENCER_STATE_PRIVATE_KEY_0 - ESPRESSO_SEQUENCER_IS_DA=true - - IDENTITY_NODE_NAME=sequencer0 - - IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000000 - - IDENTITY_COMPANY_NAME=Espresso Systems - - IDENTITY_OPERATING_SYSTEM=Linux 5.15.153.1 - - IDENTITY_NETWORK_TYPE=local - - IDENTITY_COUNTRY_CODE=US - - IDENTITY_LATITUDE=40.7128 - - IDENTITY_LONGITUDE=-74.0060 + - ESPRESSO_SEQUENCER_IDENTITY_NODE_NAME=sequencer0 + - ESPRESSO_SEQUENCER_IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000000 + - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_NAME=Espresso Systems + - ESPRESSO_SEQUENCER_IDENTITY_OPERATING_SYSTEM=Linux 5.15.153.1 + - ESPRESSO_SEQUENCER_IDENTITY_NETWORK_TYPE=local + - ESPRESSO_SEQUENCER_IDENTITY_COUNTRY_CODE=US + - ESPRESSO_SEQUENCER_IDENTITY_LATITUDE=40.7128 + - ESPRESSO_SEQUENCER_IDENTITY_LONGITUDE=-74.0060 depends_on: orchestrator: condition: process_healthy @@ -167,14 +167,14 @@ processes: - ESPRESSO_SEQUENCER_PRIVATE_STAKING_KEY=$ESPRESSO_DEMO_SEQUENCER_STAKING_PRIVATE_KEY_1 - ESPRESSO_SEQUENCER_PRIVATE_STATE_KEY=$ESPRESSO_DEMO_SEQUENCER_STATE_PRIVATE_KEY_1 - ESPRESSO_SEQUENCER_IS_DA=true - - IDENTITY_NODE_NAME=sequencer1 - - IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000001 - - IDENTITY_COMPANY_NAME=Espresso Systems - - IDENTITY_OPERATING_SYSTEM=Darwin 23.5.0 - - IDENTITY_NETWORK_TYPE=local - - IDENTITY_COUNTRY_CODE=GR - - IDENTITY_LATITUDE=39.0742 - - IDENTITY_LONGITUDE=21.8243 + - ESPRESSO_SEQUENCER_IDENTITY_NODE_NAME=sequencer1 + - ESPRESSO_SEQUENCER_IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000001 + - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_NAME=Espresso Systems + - ESPRESSO_SEQUENCER_IDENTITY_OPERATING_SYSTEM=Darwin 23.5.0 + - ESPRESSO_SEQUENCER_IDENTITY_NETWORK_TYPE=local + - ESPRESSO_SEQUENCER_IDENTITY_COUNTRY_CODE=GR + - ESPRESSO_SEQUENCER_IDENTITY_LATITUDE=39.0742 + - ESPRESSO_SEQUENCER_IDENTITY_LONGITUDE=21.8243 depends_on: orchestrator: condition: process_healthy @@ -211,14 +211,14 @@ processes: - ESPRESSO_SEQUENCER_PRIVATE_STAKING_KEY=$ESPRESSO_DEMO_SEQUENCER_STAKING_PRIVATE_KEY_2 - ESPRESSO_SEQUENCER_PRIVATE_STATE_KEY=$ESPRESSO_DEMO_SEQUENCER_STATE_PRIVATE_KEY_2 - ESPRESSO_SEQUENCER_IS_DA=true - - IDENTITY_NODE_NAME=sequencer2 - - IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000002 - - IDENTITY_COMPANY_NAME=Espresso Systems - - IDENTITY_OPERATING_SYSTEM=Darwin 23.5.0 - - IDENTITY_NETWORK_TYPE=local - - IDENTITY_COUNTRY_CODE=CN - - IDENTITY_LATITUDE=35.8617 - - IDENTITY_LONGITUDE=104.1954 + - ESPRESSO_SEQUENCER_IDENTITY_NODE_NAME=sequencer2 + - ESPRESSO_SEQUENCER_IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000002 + - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_NAME=Espresso Systems + - ESPRESSO_SEQUENCER_IDENTITY_OPERATING_SYSTEM=Darwin 23.5.0 + - ESPRESSO_SEQUENCER_IDENTITY_NETWORK_TYPE=local + - ESPRESSO_SEQUENCER_IDENTITY_COUNTRY_CODE=CN + - ESPRESSO_SEQUENCER_IDENTITY_LATITUDE=35.8617 + - ESPRESSO_SEQUENCER_IDENTITY_LONGITUDE=104.1954 depends_on: orchestrator: condition: process_healthy @@ -252,15 +252,15 @@ processes: - ESPRESSO_SEQUENCER_STORAGE_PATH=$ESPRESSO_BASE_STORAGE_PATH/seq3 - ESPRESSO_SEQUENCER_PRIVATE_STAKING_KEY=$ESPRESSO_DEMO_SEQUENCER_STAKING_PRIVATE_KEY_3 - ESPRESSO_SEQUENCER_PRIVATE_STATE_KEY=$ESPRESSO_DEMO_SEQUENCER_STATE_PRIVATE_KEY_3 - - IDENTITY_NODE_NAME=sequencer3 - - IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000003 - - IDENTITY_COMPANY_NAME=Espresso Systems - - IDENTITY_OPERATING_SYSTEM=Microsoft Windows NT 10.0.22621.0 - - IDENTITY_E=espresso-sequencer@0.1.0 - - IDENTITY_NETWORK_TYPE=local - - IDENTITY_COUNTRY_CODE=CN - - IDENTITY_LATITUDE=35.8617 - - IDENTITY_LONGITUDE=104.1954 + - ESPRESSO_SEQUENCER_IDENTITY_NODE_NAME=sequencer3 + - ESPRESSO_SEQUENCER_IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000003 + - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_NAME=Espresso Systems + - ESPRESSO_SEQUENCER_IDENTITY_OPERATING_SYSTEM=Microsoft Windows NT 10.0.22621.0 + - ESPRESSO_SEQUENCER_IDENTITY_E=espresso-sequencer@0.1.0 + - ESPRESSO_SEQUENCER_IDENTITY_NETWORK_TYPE=local + - ESPRESSO_SEQUENCER_IDENTITY_COUNTRY_CODE=CN + - ESPRESSO_SEQUENCER_IDENTITY_LATITUDE=35.8617 + - ESPRESSO_SEQUENCER_IDENTITY_LONGITUDE=104.1954 depends_on: orchestrator: condition: process_healthy @@ -292,14 +292,14 @@ processes: - ESPRESSO_SEQUENCER_STORAGE_PATH=$ESPRESSO_BASE_STORAGE_PATH/seq4 - ESPRESSO_SEQUENCER_PRIVATE_STAKING_KEY=$ESPRESSO_DEMO_SEQUENCER_STAKING_PRIVATE_KEY_4 - ESPRESSO_SEQUENCER_PRIVATE_STATE_KEY=$ESPRESSO_DEMO_SEQUENCER_STATE_PRIVATE_KEY_4 - - IDENTITY_NODE_NAME=sequencer4 - - IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000004 - - IDENTITY_COMPANY_NAME=Espresso Systems - - IDENTITY_OPERATING_SYSTEM=TempleOS 5.03 - - IDENTITY_NETWORK_TYPE=local - - IDENTITY_COUNTRY_CODE=AU - - IDENTITY_LATITUDE=-25.2744 - - IDENTITY_LONGITUDE=133.7751 + - ESPRESSO_SEQUENCER_IDENTITY_NODE_NAME=sequencer4 + - ESPRESSO_SEQUENCER_IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000004 + - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_NAME=Espresso Systems + - ESPRESSO_SEQUENCER_IDENTITY_OPERATING_SYSTEM=TempleOS 5.03 + - ESPRESSO_SEQUENCER_IDENTITY_NETWORK_TYPE=local + - ESPRESSO_SEQUENCER_IDENTITY_COUNTRY_CODE=AU + - ESPRESSO_SEQUENCER_IDENTITY_LATITUDE=-25.2744 + - ESPRESSO_SEQUENCER_IDENTITY_LONGITUDE=133.7751 depends_on: orchestrator: condition: process_healthy diff --git a/sequencer/src/lib.rs b/sequencer/src/lib.rs index 5f3a1c07b..724f350c6 100644 --- a/sequencer/src/lib.rs +++ b/sequencer/src/lib.rs @@ -157,13 +157,13 @@ pub async fn init_node( ], ) .create(vec![ - std::env::var("IDENTITY_NODE_NAME").unwrap_or("".into()), - std::env::var("IDENTITY_WALLET_ADDRESS").unwrap_or("".into()), - std::env::var("IDENTITY_COMPANY_NAME").unwrap_or("".into()), - std::env::var("IDENTITY_OPERATING_SYSTEM").unwrap_or("".into()), - std::env::var("IDENTITY_NODE_TYPE") + std::env::var("ESPRESSO_SEQUENCER_IDENTITY_NODE_NAME").unwrap_or("".into()), + std::env::var("ESPRESSO_SEQUENCER_IDENTITY_WALLET_ADDRESS").unwrap_or("".into()), + std::env::var("ESPRESSO_SEQUENCER_IDENTITY_COMPANY_NAME").unwrap_or("".into()), + std::env::var("ESPRESSO_SEQUENCER_IDENTITY_OPERATING_SYSTEM").unwrap_or("".into()), + std::env::var("ESPRESSO_SEQUENCER_IDENTITY_NODE_TYPE") .unwrap_or(format!("espresso-sequencer {}", Ver::VERSION)), - std::env::var("IDENTITY_NETWORK_TYPE").unwrap_or("".into()), + std::env::var("ESPRESSO_SEQUENCER_IDENTITY_NETWORK_TYPE").unwrap_or("".into()), ]); // Expose Node Identity Location via the status/metrics API @@ -173,9 +173,9 @@ pub async fn init_node( vec!["country".into(), "latitude".into(), "longitude".into()], ) .create(vec![ - std::env::var("IDENTITY_COUNTRY_CODE").unwrap_or("".into()), - std::env::var("IDENTITY_LATITUDE").unwrap_or("".into()), - std::env::var("IDENTITY_LONGITUDE").unwrap_or("".into()), + std::env::var("ESPRESSO_SEQUENCER_IDENTITY_COUNTRY_CODE").unwrap_or("".into()), + std::env::var("ESPRESSO_SEQUENCER_IDENTITY_LATITUDE").unwrap_or("".into()), + std::env::var("ESPRESSO_SEQUENCER_IDENTITY_LONGITUDE").unwrap_or("".into()), ]); // Stick our public key in `metrics` so it is easily accessible via the status API. From 139a5592261451af9cfd47462679d0c044ced0a6 Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Wed, 24 Jul 2024 07:28:04 -0600 Subject: [PATCH 41/72] Add public API advertisement environment variables In order for the sequencer to respond to `RollCall` messages it needs to be given a URL to advertise it's own public API base path. The process-compose and docker-compose files have been updated to provide these public API base URL via it's environment variable configurations. --- docker-compose.yaml | 5 +++++ process-compose.yaml | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/docker-compose.yaml b/docker-compose.yaml index a88e5edeb..2db50c97d 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -244,6 +244,7 @@ services: - ESPRESSO_SEQUENCER_IDENTITY_COUNTRY_CODE=US - ESPRESSO_SEQUENCER_IDENTITY_LATITUDE=40.7128 - ESPRESSO_SEQUENCER_IDENTITY_LONGITUDE=-74.0060 + - ESPRESSO_SEQUENCER_PUBLIC_API_URL=http://sequencer0:$ESPRESSO_SEQUENCER_API_PORT/ - RUST_LOG - RUST_LOG_FORMAT - ASYNC_STD_THREAD_COUNT @@ -298,6 +299,7 @@ services: - ESPRESSO_SEQUENCER_IDENTITY_COUNTRY_CODE=GR - ESPRESSO_SEQUENCER_IDENTITY_LATITUDE=39.0742 - ESPRESSO_SEQUENCER_IDENTITY_LONGITUDE=21.8243 + - ESPRESSO_SEQUENCER_PUBLIC_API_URL=http://sequencer1:$ESPRESSO_SEQUENCER_API_PORT/ - RUST_LOG - RUST_LOG_FORMAT - ASYNC_STD_THREAD_COUNT @@ -348,6 +350,7 @@ services: - ESPRESSO_SEQUENCER_IDENTITY_COUNTRY_CODE=CN - ESPRESSO_SEQUENCER_IDENTITY_LATITUDE=35.8617 - ESPRESSO_SEQUENCER_IDENTITY_LONGITUDE=104.1954 + - ESPRESSO_SEQUENCER_PUBLIC_API_URL=http://sequencer2:$ESPRESSO_SEQUENCER_API_PORT/ - RUST_LOG - RUST_LOG_FORMAT - ASYNC_STD_THREAD_COUNT @@ -395,6 +398,7 @@ services: - ESPRESSO_SEQUENCER_IDENTITY_COUNTRY_CODE=CN - ESPRESSO_SEQUENCER_IDENTITY_LATITUDE=35.8617 - ESPRESSO_SEQUENCER_IDENTITY_LONGITUDE=104.1954 + - ESPRESSO_SEQUENCER_PUBLIC_API_URL=http://sequencer3:$ESPRESSO_SEQUENCER_API_PORT/ - RUST_LOG - RUST_LOG_FORMAT - ASYNC_STD_THREAD_COUNT @@ -441,6 +445,7 @@ services: - ESPRESSO_SEQUENCER_IDENTITY_COUNTRY_CODE=AU - ESPRESSO_SEQUENCER_IDENTITY_LATITUDE=-25.2744 - ESPRESSO_SEQUENCER_IDENTITY_LONGITUDE=133.7751 + - ESPRESSO_SEQUENCER_PUBLIC_API_URL=http://sequencer4:$ESPRESSO_SEQUENCER_API_PORT/ - RUST_LOG - RUST_LOG_FORMAT - ASYNC_STD_THREAD_COUNT diff --git a/process-compose.yaml b/process-compose.yaml index 9efd3fa0d..be998eaac 100644 --- a/process-compose.yaml +++ b/process-compose.yaml @@ -125,6 +125,7 @@ processes: - ESPRESSO_SEQUENCER_IDENTITY_COUNTRY_CODE=US - ESPRESSO_SEQUENCER_IDENTITY_LATITUDE=40.7128 - ESPRESSO_SEQUENCER_IDENTITY_LONGITUDE=-74.0060 + - ESPRESSO_SEQUENCER_PUBLIC_API_URL=http://localhost:$ESPRESSO_SEQUENCER_API_PORT/ depends_on: orchestrator: condition: process_healthy @@ -175,6 +176,7 @@ processes: - ESPRESSO_SEQUENCER_IDENTITY_COUNTRY_CODE=GR - ESPRESSO_SEQUENCER_IDENTITY_LATITUDE=39.0742 - ESPRESSO_SEQUENCER_IDENTITY_LONGITUDE=21.8243 + - ESPRESSO_SEQUENCER_PUBLIC_API_URL=http://localhost:$ESPRESSO_SEQUENCER1_API_PORT/ depends_on: orchestrator: condition: process_healthy @@ -219,6 +221,7 @@ processes: - ESPRESSO_SEQUENCER_IDENTITY_COUNTRY_CODE=CN - ESPRESSO_SEQUENCER_IDENTITY_LATITUDE=35.8617 - ESPRESSO_SEQUENCER_IDENTITY_LONGITUDE=104.1954 + - ESPRESSO_SEQUENCER_PUBLIC_API_URL=http://localhost:$ESPRESSO_SEQUENCER2_API_PORT/ depends_on: orchestrator: condition: process_healthy @@ -261,6 +264,7 @@ processes: - ESPRESSO_SEQUENCER_IDENTITY_COUNTRY_CODE=CN - ESPRESSO_SEQUENCER_IDENTITY_LATITUDE=35.8617 - ESPRESSO_SEQUENCER_IDENTITY_LONGITUDE=104.1954 + - ESPRESSO_SEQUENCER_PUBLIC_API_URL=http://localhost:$ESPRESSO_SEQUENCER3_API_PORT/ depends_on: orchestrator: condition: process_healthy @@ -300,6 +304,7 @@ processes: - ESPRESSO_SEQUENCER_IDENTITY_COUNTRY_CODE=AU - ESPRESSO_SEQUENCER_IDENTITY_LATITUDE=-25.2744 - ESPRESSO_SEQUENCER_IDENTITY_LONGITUDE=133.7751 + - ESPRESSO_SEQUENCER_PUBLIC_API_URL=http://localhost:$ESPRESSO_SEQUENCER4_API_PORT/ depends_on: orchestrator: condition: process_healthy From 6a4be2e2ebae3c7fe7d80d61953fc8ba34af27cf Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Wed, 24 Jul 2024 08:01:15 -0600 Subject: [PATCH 42/72] Move sample prometheus output for test to its own file --- .../v0/example_prometheus_metrics_output.txt | 89 ++++++++++++++++++ node-metrics/src/api/node_validator/v0/mod.rs | 90 +------------------ 2 files changed, 90 insertions(+), 89 deletions(-) create mode 100644 node-metrics/src/api/node_validator/v0/example_prometheus_metrics_output.txt diff --git a/node-metrics/src/api/node_validator/v0/example_prometheus_metrics_output.txt b/node-metrics/src/api/node_validator/v0/example_prometheus_metrics_output.txt new file mode 100644 index 000000000..ca392ecf7 --- /dev/null +++ b/node-metrics/src/api/node_validator/v0/example_prometheus_metrics_output.txt @@ -0,0 +1,89 @@ +# HELP consensus_cdn_num_failed_messages num_failed_messages +# TYPE consensus_cdn_num_failed_messages counter +consensus_cdn_num_failed_messages 0 +# HELP consensus_current_view current_view +# TYPE consensus_current_view gauge +consensus_current_view 7 +# HELP consensus_invalid_qc invalid_qc +# TYPE consensus_invalid_qc gauge +consensus_invalid_qc 0 +# HELP consensus_last_decided_time last_decided_time +# TYPE consensus_last_decided_time gauge +consensus_last_decided_time 1720537017 +# HELP consensus_last_decided_view last_decided_view +# TYPE consensus_last_decided_view gauge +consensus_last_decided_view 4 +# HELP consensus_last_synced_block_height last_synced_block_height +# TYPE consensus_last_synced_block_height gauge +consensus_last_synced_block_height 4 +# HELP consensus_libp2p_num_connected_peers num_connected_peers +# TYPE consensus_libp2p_num_connected_peers gauge +consensus_libp2p_num_connected_peers 4 +# HELP consensus_libp2p_num_failed_messages num_failed_messages +# TYPE consensus_libp2p_num_failed_messages counter +consensus_libp2p_num_failed_messages 0 +# HELP consensus_node node +# TYPE consensus_node gauge +consensus_node{key="BLS_VER_KEY~bQszS-QKYvUij2g20VqS8asttGSb95NrTu2PUj0uMh1CBUxNy1FqyPDjZqB29M7ZbjWqj79QkEOWkpga84AmDYUeTuWmy-0P1AdKHD3ehc-dKvei78BDj5USwXPJiDUlCxvYs_9rWYhagaq-5_LXENr78xel17spftNd5MA1Mw5U"} 1 +# HELP consensus_node_identity_general node_identity_general +# TYPE consensus_node_identity_general gauge +consensus_node_identity_general{company_name="Espresso Systems",name="sequencer0",network_type="local",node_type="espresso-sequencer 0.1",operating_system="Linux 5.15.153.1",wallet="0x0000000000000000000000000000000000000000"} 1 +# HELP consensus_node_identity_location node_identity_location +# TYPE consensus_node_identity_location gauge +consensus_node_identity_location{country="US",latitude="-40.7128",longitude="-74.0060"} 1 +# HELP consensus_node_index node_index +# TYPE consensus_node_index gauge +consensus_node_index 4 +# HELP consensus_number_of_empty_blocks_proposed number_of_empty_blocks_proposed +# TYPE consensus_number_of_empty_blocks_proposed counter +consensus_number_of_empty_blocks_proposed 1 +# HELP consensus_number_of_timeouts number_of_timeouts +# TYPE consensus_number_of_timeouts counter +consensus_number_of_timeouts 0 +# HELP consensus_number_of_timeouts_as_leader number_of_timeouts_as_leader +# TYPE consensus_number_of_timeouts_as_leader counter +consensus_number_of_timeouts_as_leader 0 +# HELP consensus_number_of_views_per_decide_event number_of_views_per_decide_event +# TYPE consensus_number_of_views_per_decide_event histogram +consensus_number_of_views_per_decide_event_bucket{le="0.005"} 0 +consensus_number_of_views_per_decide_event_bucket{le="0.01"} 0 +consensus_number_of_views_per_decide_event_bucket{le="0.025"} 0 +consensus_number_of_views_per_decide_event_bucket{le="0.05"} 0 +consensus_number_of_views_per_decide_event_bucket{le="0.1"} 0 +consensus_number_of_views_per_decide_event_bucket{le="0.25"} 0 +consensus_number_of_views_per_decide_event_bucket{le="0.5"} 0 +consensus_number_of_views_per_decide_event_bucket{le="1"} 0 +consensus_number_of_views_per_decide_event_bucket{le="2.5"} 0 +consensus_number_of_views_per_decide_event_bucket{le="5"} 4 +consensus_number_of_views_per_decide_event_bucket{le="10"} 4 +consensus_number_of_views_per_decide_event_bucket{le="+Inf"} 4 +consensus_number_of_views_per_decide_event_sum 12 +consensus_number_of_views_per_decide_event_count 4 +# HELP consensus_number_of_views_since_last_decide number_of_views_since_last_decide +# TYPE consensus_number_of_views_since_last_decide gauge +consensus_number_of_views_since_last_decide 4 +# HELP consensus_outstanding_transactions outstanding_transactions +# TYPE consensus_outstanding_transactions gauge +consensus_outstanding_transactions 0 +# HELP consensus_outstanding_transactions_memory_size outstanding_transactions_memory_size +# TYPE consensus_outstanding_transactions_memory_size gauge +consensus_outstanding_transactions_memory_size 0 +# HELP consensus_version version +# TYPE consensus_version gauge +consensus_version{desc="20240701-15-gbd0957fd-dirty",rev="bd0957fddad19caab010dc59e5a92bc1c95cbc07",timestamp="1980-01-01T00:00:00.000000000Z"} 1 +# HELP consensus_view_duration_as_leader view_duration_as_leader +# TYPE consensus_view_duration_as_leader histogram +consensus_view_duration_as_leader_bucket{le="0.005"} 0 +consensus_view_duration_as_leader_bucket{le="0.01"} 0 +consensus_view_duration_as_leader_bucket{le="0.025"} 0 +consensus_view_duration_as_leader_bucket{le="0.05"} 0 +consensus_view_duration_as_leader_bucket{le="0.1"} 0 +consensus_view_duration_as_leader_bucket{le="0.25"} 0 +consensus_view_duration_as_leader_bucket{le="0.5"} 0 +consensus_view_duration_as_leader_bucket{le="1"} 0 +consensus_view_duration_as_leader_bucket{le="2.5"} 1 +consensus_view_duration_as_leader_bucket{le="5"} 1 +consensus_view_duration_as_leader_bucket{le="10"} 1 +consensus_view_duration_as_leader_bucket{le="+Inf"} 1 +consensus_view_duration_as_leader_sum 2 +consensus_view_duration_as_leader_count 1 diff --git a/node-metrics/src/api/node_validator/v0/mod.rs b/node-metrics/src/api/node_validator/v0/mod.rs index 80ca925dc..756435978 100644 --- a/node-metrics/src/api/node_validator/v0/mod.rs +++ b/node-metrics/src/api/node_validator/v0/mod.rs @@ -718,95 +718,7 @@ mod tests { }; fn example_prometheus_output() -> &'static str { - "# HELP consensus_cdn_num_failed_messages num_failed_messages -# TYPE consensus_cdn_num_failed_messages counter -consensus_cdn_num_failed_messages 0 -# HELP consensus_current_view current_view -# TYPE consensus_current_view gauge -consensus_current_view 7 -# HELP consensus_invalid_qc invalid_qc -# TYPE consensus_invalid_qc gauge -consensus_invalid_qc 0 -# HELP consensus_last_decided_time last_decided_time -# TYPE consensus_last_decided_time gauge -consensus_last_decided_time 1720537017 -# HELP consensus_last_decided_view last_decided_view -# TYPE consensus_last_decided_view gauge -consensus_last_decided_view 4 -# HELP consensus_last_synced_block_height last_synced_block_height -# TYPE consensus_last_synced_block_height gauge -consensus_last_synced_block_height 4 -# HELP consensus_libp2p_num_connected_peers num_connected_peers -# TYPE consensus_libp2p_num_connected_peers gauge -consensus_libp2p_num_connected_peers 4 -# HELP consensus_libp2p_num_failed_messages num_failed_messages -# TYPE consensus_libp2p_num_failed_messages counter -consensus_libp2p_num_failed_messages 0 -# HELP consensus_node node -# TYPE consensus_node gauge -consensus_node{key=\"BLS_VER_KEY~bQszS-QKYvUij2g20VqS8asttGSb95NrTu2PUj0uMh1CBUxNy1FqyPDjZqB29M7ZbjWqj79QkEOWkpga84AmDUseTuWmy-0P1AdKHD3ehc-dKvei78BDj5USwXPJiDUlCxvYs_9rWYhagaq-5_LXENr78xel17spfAnd5MA1Mw5U\"} 1 -# HELP consensus_node_identity_general node_identity_general -# TYPE consensus_node_identity_general gauge -consensus_node_identity_general{company_name=\"Espresso Systems\",name=\"sequencer0\",network_type=\"local\",node_type=\"espresso-sequencer 0.1\",operating_system=\"Linux 5.15.153.1\",wallet=\"0x0000000000000000000000000000000000000000\"} 1 -# HELP consensus_node_identity_location node_identity_location -# TYPE consensus_node_identity_location gauge -consensus_node_identity_location{country=\"US\",latitude=\"-40.7128\",longitude=\"-74.0060\"} 1 -# HELP consensus_node_index node_index -# TYPE consensus_node_index gauge -consensus_node_index 4 -# HELP consensus_number_of_empty_blocks_proposed number_of_empty_blocks_proposed -# TYPE consensus_number_of_empty_blocks_proposed counter -consensus_number_of_empty_blocks_proposed 1 -# HELP consensus_number_of_timeouts number_of_timeouts -# TYPE consensus_number_of_timeouts counter -consensus_number_of_timeouts 0 -# HELP consensus_number_of_timeouts_as_leader number_of_timeouts_as_leader -# TYPE consensus_number_of_timeouts_as_leader counter -consensus_number_of_timeouts_as_leader 0 -# HELP consensus_number_of_views_per_decide_event number_of_views_per_decide_event -# TYPE consensus_number_of_views_per_decide_event histogram -consensus_number_of_views_per_decide_event_bucket{le=\"0.005\"} 0 -consensus_number_of_views_per_decide_event_bucket{le=\"0.01\"} 0 -consensus_number_of_views_per_decide_event_bucket{le=\"0.025\"} 0 -consensus_number_of_views_per_decide_event_bucket{le=\"0.05\"} 0 -consensus_number_of_views_per_decide_event_bucket{le=\"0.1\"} 0 -consensus_number_of_views_per_decide_event_bucket{le=\"0.25\"} 0 -consensus_number_of_views_per_decide_event_bucket{le=\"0.5\"} 0 -consensus_number_of_views_per_decide_event_bucket{le=\"1\"} 0 -consensus_number_of_views_per_decide_event_bucket{le=\"2.5\"} 0 -consensus_number_of_views_per_decide_event_bucket{le=\"5\"} 4 -consensus_number_of_views_per_decide_event_bucket{le=\"10\"} 4 -consensus_number_of_views_per_decide_event_bucket{le=\"+Inf\"} 4 -consensus_number_of_views_per_decide_event_sum 12 -consensus_number_of_views_per_decide_event_count 4 -# HELP consensus_number_of_views_since_last_decide number_of_views_since_last_decide -# TYPE consensus_number_of_views_since_last_decide gauge -consensus_number_of_views_since_last_decide 4 -# HELP consensus_outstanding_transactions outstanding_transactions -# TYPE consensus_outstanding_transactions gauge -consensus_outstanding_transactions 0 -# HELP consensus_outstanding_transactions_memory_size outstanding_transactions_memory_size -# TYPE consensus_outstanding_transactions_memory_size gauge -consensus_outstanding_transactions_memory_size 0 -# HELP consensus_version version -# TYPE consensus_version gauge -consensus_version{desc=\"20240701-15-gbd0957fd-dirty\",rev=\"bd0957fddad19caab010dc59e5a92bc1c95cbc07\",timestamp=\"1980-01-01T00:00:00.000000000Z\"} 1 -# HELP consensus_view_duration_as_leader view_duration_as_leader -# TYPE consensus_view_duration_as_leader histogram -consensus_view_duration_as_leader_bucket{le=\"0.005\"} 0 -consensus_view_duration_as_leader_bucket{le=\"0.01\"} 0 -consensus_view_duration_as_leader_bucket{le=\"0.025\"} 0 -consensus_view_duration_as_leader_bucket{le=\"0.05\"} 0 -consensus_view_duration_as_leader_bucket{le=\"0.1\"} 0 -consensus_view_duration_as_leader_bucket{le=\"0.25\"} 0 -consensus_view_duration_as_leader_bucket{le=\"0.5\"} 0 -consensus_view_duration_as_leader_bucket{le=\"1\"} 0 -consensus_view_duration_as_leader_bucket{le=\"2.5\"} 1 -consensus_view_duration_as_leader_bucket{le=\"5\"} 1 -consensus_view_duration_as_leader_bucket{le=\"10\"} 1 -consensus_view_duration_as_leader_bucket{le=\"+Inf\"} 1 -consensus_view_duration_as_leader_sum 2 -consensus_view_duration_as_leader_count 1" + include_str!("example_prometheus_metrics_output.txt") } #[test] From c77239c3039be4db978af6abcce426925f18b938 Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Wed, 24 Jul 2024 08:24:22 -0600 Subject: [PATCH 43/72] Add test file to typos exclusion list --- .typos.toml | 1 + .../v0/create_node_validator_api.rs | 171 ++++++++++-------- 2 files changed, 100 insertions(+), 72 deletions(-) diff --git a/.typos.toml b/.typos.toml index 53cdcf235..37ee97ca2 100644 --- a/.typos.toml +++ b/.typos.toml @@ -4,4 +4,5 @@ extend-exclude = [ "doc/*.svg", "contracts/lib", "contract-bindings", + "node-metrics/src/api/node_validator/v0/example_prometheus_metrics_output.txt", ] diff --git a/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs b/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs index 9be7185b6..1efe92c5d 100644 --- a/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs +++ b/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs @@ -2,7 +2,7 @@ use std::sync::Arc; use super::{ get_stake_table_from_sequencer, process_node_identity_url_stream, - stream_leaves_from_hotshot_query_service, StateClientMessageSender, STATIC_VER_0_1, + stream_leaves_from_hotshot_query_service, }; use crate::service::{ client_id::ClientId, @@ -18,22 +18,11 @@ use crate::service::{ }; use async_std::{stream::StreamExt, sync::RwLock, task::JoinHandle}; use futures::{ - channel::mpsc::{self, Sender}, + channel::mpsc::{self, Receiver, Sender}, SinkExt, }; -use tide_disco::App; use url::Url; -pub struct NodeValidatorAPIState { - pub sender: Sender>>, -} - -impl StateClientMessageSender> for NodeValidatorAPIState { - fn sender(&self) -> Sender>> { - self.sender.clone() - } -} - pub struct NodeValidatorAPI { pub task_handles: Vec>, } @@ -44,29 +33,22 @@ pub struct NodeValidatorConfig { pub initial_node_public_base_urls: Vec, } -pub async fn create_node_validator_api( - config: NodeValidatorConfig, -) -> (NodeValidatorAPI, JoinHandle<()>) { - let node_validator_api_result = super::define_api::(); - - let node_validator_api = match node_validator_api_result { - Ok(api) => api, - Err(e) => { - panic!("Error: {:?}", e); - } - }; - - let (server_message_sender, server_message_receiver) = mpsc::channel(32); - let mut app: App = - App::with_state(NodeValidatorAPIState { - sender: server_message_sender, - }); - let register_module_result = app.register_module("node-validator", node_validator_api); - - if let Err(e) = register_module_result { - panic!("Error: {:?}", e); - } +#[derive(Debug)] +pub enum CreateNodeValidatorProcessingError { + FailedToGetStakeTable(hotshot_query_service::Error), +} +/** + * create_node_validator_processing is a function that creates a node validator + * processing environment. This function will create a number of tasks that + * will be responsible for processing the data streams that are coming in from + * the various sources. This function will also create the data state that + * will be used to store the state of the network. + */ +pub async fn create_node_validator_processing( + config: NodeValidatorConfig, + server_message_receiver: Receiver>>, +) -> Result { let mut data_state = DataState::new( Default::default(), Default::default(), @@ -82,13 +64,12 @@ pub async fn create_node_validator_api( ClientId::from_count(1), ); - let client = surf_disco::Client::new( - // "https://query.cappuccino.testnet.espresso.network/v0" - config.stake_table_url_base, - ); + let client = surf_disco::Client::new(config.stake_table_url_base); + + let stake_table = get_stake_table_from_sequencer(client.clone()) + .await + .map_err(CreateNodeValidatorProcessingError::FailedToGetStakeTable)?; - let get_stake_table_result = get_stake_table_from_sequencer(client.clone()).await; - let stake_table = get_stake_table_result.unwrap(); data_state.replace_stake_table(stake_table); let data_state = Arc::new(RwLock::new(data_state)); @@ -142,13 +123,20 @@ pub async fn create_node_validator_api( )); let leaf_retriever_handle = async_std::task::spawn(async move { - // Alright, let's get some leaves, bro + // Alright, let's start processing leaves + // TODO: We should move this into its own function that can respond + // and react appropriately when a service or sequencer does down + // so that it can gracefully re-establish the stream as necessary. let client = client; - let mut leaf_stream = stream_leaves_from_hotshot_query_service(None, client) - .await - .unwrap(); + let mut leaf_stream = match stream_leaves_from_hotshot_query_service(None, client).await { + Ok(leaf_stream) => leaf_stream, + Err(err) => { + tracing::info!("error getting leaf stream: {}", err); + return; + } + }; let mut leaf_sender = leaf_sender; @@ -184,37 +172,62 @@ pub async fn create_node_validator_api( } } - let app_serve_handle = async_std::task::spawn(async move { - let app_serve_result = app.serve("0.0.0.0:9000", STATIC_VER_0_1).await; - tracing::info!("app serve result: {:?}", app_serve_result); - }); - - tracing::info!("listening on: {:?}", config.bind_address); - - ( - NodeValidatorAPI { - task_handles: vec![ - process_internal_client_message_handle, - process_distribute_block_detail_handle, - process_distribute_node_identity_handle, - process_distribute_voters_handle, - process_leaf_stream_handle, - process_node_identity_stream_handle, - process_url_stream_handle, - leaf_retriever_handle, - ], - }, - app_serve_handle, - ) + Ok(NodeValidatorAPI { + task_handles: vec![ + process_internal_client_message_handle, + process_distribute_block_detail_handle, + process_distribute_node_identity_handle, + process_distribute_voters_handle, + process_leaf_stream_handle, + process_node_identity_stream_handle, + process_url_stream_handle, + leaf_retriever_handle, + ], + }) } +#[cfg(test)] mod test { + use crate::{ + api::node_validator::v0::{StateClientMessageSender, STATIC_VER_0_1}, + service::{client_message::InternalClientMessage, server_message::ServerMessage}, + }; + use futures::channel::mpsc::{self, Sender}; + use tide_disco::App; + + struct TestState(Sender>>); + + impl StateClientMessageSender> for TestState { + fn sender(&self) -> Sender>> { + self.0.clone() + } + } #[async_std::test] #[ignore] async fn test_full_setup_example() { - let (node_validator_api, app_serve_handle) = - super::create_node_validator_api(super::NodeValidatorConfig { + let (internal_client_message_sender, internal_client_message_receiver) = mpsc::channel(32); + let state = TestState(internal_client_message_sender); + // let state = Arc::new(state); + + let mut app: App<_, crate::api::node_validator::v0::Error> = App::with_state(state); + let node_validator_api_result = super::super::define_api::(); + let node_validator_api = match node_validator_api_result { + Ok(node_validator_api) => node_validator_api, + Err(err) => { + panic!("error defining node validator api: {:?}", err); + } + }; + + match app.register_module("node-validator", node_validator_api) { + Ok(_) => {} + Err(err) => { + panic!("error registering node validator api: {:?}", err); + } + } + + let node_validator_task_state = match super::create_node_validator_processing( + super::NodeValidatorConfig { bind_address: "0.0.0.0:9000".to_string(), stake_table_url_base: "http://localhost:24000/v0".parse().unwrap(), initial_node_public_base_urls: vec![ @@ -224,13 +237,27 @@ mod test { "http://localhost:24003/".parse().unwrap(), "http://localhost:24004/".parse().unwrap(), ], - }) - .await; + }, + internal_client_message_receiver, + ) + .await + { + Ok(node_validator_task_state) => node_validator_task_state, + + Err(err) => { + panic!("error defining node validator api: {:?}", err); + } + }; // We would like to wait until being signaled + let app_serve_handle = async_std::task::spawn(async move { + let app_serve_result = app.serve("0.0.0.0:9000", STATIC_VER_0_1).await; + tracing::info!("app serve result: {:?}", app_serve_result); + }); + app_serve_handle.await; - for handle in node_validator_api.task_handles { + for handle in node_validator_task_state.task_handles { handle.cancel().await; } } From d15df9097ad51269c35339fa464470ddd747f8fa Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Wed, 24 Jul 2024 11:14:29 -0600 Subject: [PATCH 44/72] Fix outdated `get_stake_table_from_sequencer` implementation The `get_stake_table_from_sequencer` implementation currently reflects a configuration that has the `known_nodes_with_stake` at the root level. This has since changed and `known_nodes_with_stake` is now at a deeper level, under another key called `config`. --- node-metrics/src/api/node_validator/v0/mod.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/node-metrics/src/api/node_validator/v0/mod.rs b/node-metrics/src/api/node_validator/v0/mod.rs index 756435978..54ada9105 100644 --- a/node-metrics/src/api/node_validator/v0/mod.rs +++ b/node-metrics/src/api/node_validator/v0/mod.rs @@ -292,6 +292,11 @@ pub struct PublishHotShotConfig { pub known_nodes_with_stake: Vec>, } +#[derive(Debug, Deserialize)] +pub struct SequencerConfig { + pub config: PublishHotShotConfig, +} + /// [get_stake_table_from_sequencer] retrieves the stake table from the /// Sequencer. It expects a [surf_disco::Client] to be provided so that it can /// make the request to the Hotshot Query Service. It will return a @@ -308,7 +313,7 @@ pub async fn get_stake_table_from_sequencer( .header("Accept", "application/json"); let stake_table_result = request.send().await; - let public_hot_shot_config: PublishHotShotConfig = match stake_table_result { + let sequencer_config: SequencerConfig = match stake_table_result { Ok(public_hot_shot_config) => public_hot_shot_config, Err(err) => { tracing::info!("retrieve stake table request failed: {}", err); @@ -316,6 +321,8 @@ pub async fn get_stake_table_from_sequencer( } }; + let public_hot_shot_config = sequencer_config.config; + let mut stake_table = StakeTable::::new( public_hot_shot_config.known_nodes_with_stake.len(), ); From 26d91753be2d85fa14a37d82320b131ebf15aa76 Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Thu, 25 Jul 2024 07:21:04 -0600 Subject: [PATCH 45/72] Add company website to node identity information --- docker-compose.yaml | 5 +++++ .../v0/example_prometheus_metrics_output.txt | 2 +- node-metrics/src/api/node_validator/v0/mod.rs | 9 +++++++++ node-metrics/src/service/client_state/mod.rs | 3 +++ node-metrics/src/service/data_state/mod.rs | 1 + node-metrics/src/service/data_state/node_identity.rs | 9 +++++++++ process-compose.yaml | 5 +++++ sequencer/src/lib.rs | 2 ++ 8 files changed, 35 insertions(+), 1 deletion(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index 2db50c97d..b997aa2d7 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -239,6 +239,7 @@ services: - ESPRESSO_SEQUENCER_IDENTITY_NODE_NAME=sequencer0 - ESPRESSO_SEQUENCER_IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000000 - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_NAME=Espresso Systems + - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_WEBSITE=https://www.espressosys.com/ - ESPRESSO_SEQUENCER_IDENTITY_OPERATING_SYSTEM=Linux 5.15.153.1 - ESPRESSO_SEQUENCER_IDENTITY_NETWORK_TYPE=local - ESPRESSO_SEQUENCER_IDENTITY_COUNTRY_CODE=US @@ -294,6 +295,7 @@ services: - ESPRESSO_SEQUENCER_IDENTITY_NODE_NAME=sequencer1 - ESPRESSO_SEQUENCER_IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000001 - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_NAME=Espresso Systems + - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_WEBSITE=https://www.espressosys.com/ - ESPRESSO_SEQUENCER_IDENTITY_OPERATING_SYSTEM=Darwin 23.5.0 - ESPRESSO_SEQUENCER_IDENTITY_NETWORK_TYPE=local - ESPRESSO_SEQUENCER_IDENTITY_COUNTRY_CODE=GR @@ -345,6 +347,7 @@ services: - ESPRESSO_SEQUENCER_IDENTITY_NODE_NAME=sequencer2 - ESPRESSO_SEQUENCER_IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000002 - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_NAME=Espresso Systems + - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_WEBSITE=https://www.espressosys.com/ - ESPRESSO_SEQUENCER_IDENTITY_OPERATING_SYSTEM=Darwin 23.5.0 - ESPRESSO_SEQUENCER_IDENTITY_NETWORK_TYPE=local - ESPRESSO_SEQUENCER_IDENTITY_COUNTRY_CODE=CN @@ -392,6 +395,7 @@ services: - ESPRESSO_SEQUENCER_IDENTITY_NODE_NAME=sequencer3 - ESPRESSO_SEQUENCER_IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000003 - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_NAME=Espresso Systems + - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_WEBSITE=https://www.espressosys.com/ - ESPRESSO_SEQUENCER_IDENTITY_OPERATING_SYSTEM=Microsoft Windows NT 10.0.22621.0 - ESPRESSO_SEQUENCER_IDENTITY_E=espresso-sequencer@0.1.0 - ESPRESSO_SEQUENCER_IDENTITY_NETWORK_TYPE=local @@ -440,6 +444,7 @@ services: - ESPRESSO_SEQUENCER_IDENTITY_NODE_NAME=sequencer4 - ESPRESSO_SEQUENCER_IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000004 - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_NAME=Espresso Systems + - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_WEBSITE=https://www.espressosys.com/ - ESPRESSO_SEQUENCER_IDENTITY_OPERATING_SYSTEM=TempleOS 5.03 - ESPRESSO_SEQUENCER_IDENTITY_NETWORK_TYPE=local - ESPRESSO_SEQUENCER_IDENTITY_COUNTRY_CODE=AU diff --git a/node-metrics/src/api/node_validator/v0/example_prometheus_metrics_output.txt b/node-metrics/src/api/node_validator/v0/example_prometheus_metrics_output.txt index ca392ecf7..f49e38610 100644 --- a/node-metrics/src/api/node_validator/v0/example_prometheus_metrics_output.txt +++ b/node-metrics/src/api/node_validator/v0/example_prometheus_metrics_output.txt @@ -27,7 +27,7 @@ consensus_libp2p_num_failed_messages 0 consensus_node{key="BLS_VER_KEY~bQszS-QKYvUij2g20VqS8asttGSb95NrTu2PUj0uMh1CBUxNy1FqyPDjZqB29M7ZbjWqj79QkEOWkpga84AmDYUeTuWmy-0P1AdKHD3ehc-dKvei78BDj5USwXPJiDUlCxvYs_9rWYhagaq-5_LXENr78xel17spftNd5MA1Mw5U"} 1 # HELP consensus_node_identity_general node_identity_general # TYPE consensus_node_identity_general gauge -consensus_node_identity_general{company_name="Espresso Systems",name="sequencer0",network_type="local",node_type="espresso-sequencer 0.1",operating_system="Linux 5.15.153.1",wallet="0x0000000000000000000000000000000000000000"} 1 +consensus_node_identity_general{company_name="Espresso Systems",company_website="https://www.espressosys.com/",name="sequencer0",network_type="local",node_type="espresso-sequencer 0.1",operating_system="Linux 5.15.153.1",wallet="0x0000000000000000000000000000000000000000"} 1 # HELP consensus_node_identity_location node_identity_location # TYPE consensus_node_identity_location gauge consensus_node_identity_location{country="US",latitude="-40.7128",longitude="-74.0060"} 1 diff --git a/node-metrics/src/api/node_validator/v0/mod.rs b/node-metrics/src/api/node_validator/v0/mod.rs index 54ada9105..5cac91004 100644 --- a/node-metrics/src/api/node_validator/v0/mod.rs +++ b/node-metrics/src/api/node_validator/v0/mod.rs @@ -469,6 +469,15 @@ fn populate_node_identity_general_from_scrape( .labels .get("company_name") .map(|s| s.into()); + let company_website = match node_identity_general_sample + .labels + .get("company_website") + .map(Url::parse) + { + Some(Ok(url)) => Some(url), + _ => None, + }; + node_identity.company_website = company_website; node_identity.network_type = node_identity_general_sample .labels .get("network_type") diff --git a/node-metrics/src/service/client_state/mod.rs b/node-metrics/src/service/client_state/mod.rs index 4e13fa042..44213cbb9 100644 --- a/node-metrics/src/service/client_state/mod.rs +++ b/node-metrics/src/service/client_state/mod.rs @@ -1038,6 +1038,7 @@ pub mod tests { Some(Default::default()), Some("http://localhost/".parse().unwrap()), Some("company".to_string()), + Some("https://example.com/".parse().unwrap()), Some(LocationDetails::new( Some((0.0, 0.0)), Some("US".to_string()), @@ -1056,6 +1057,7 @@ pub mod tests { Some(Default::default()), Some("http://localhost/".parse().unwrap()), Some("company".to_string()), + Some("https://example.com/".parse().unwrap()), Some(LocationDetails::new( Some((0.0, 0.0)), Some("US".to_string()), @@ -1074,6 +1076,7 @@ pub mod tests { Some(Default::default()), Some("http://localhost/".parse().unwrap()), Some("company".to_string()), + Some("https://example.com/".parse().unwrap()), Some(LocationDetails::new( Some((0.0, 0.0)), Some("US".to_string()), diff --git a/node-metrics/src/service/data_state/mod.rs b/node-metrics/src/service/data_state/mod.rs index 3596a3751..e05c06be4 100644 --- a/node-metrics/src/service/data_state/mod.rs +++ b/node-metrics/src/service/data_state/mod.rs @@ -606,6 +606,7 @@ mod tests { Some(FeeAccount::default()), Some(Url::parse("https://example.com/").unwrap()), Some("company".to_string()), + Some(Url::parse("https://example.com/").unwrap()), Some(LocationDetails::new( Some((40.7128, -74.0060)), Some("US".to_string()), diff --git a/node-metrics/src/service/data_state/node_identity.rs b/node-metrics/src/service/data_state/node_identity.rs index 7578971d6..5ba8d4c49 100644 --- a/node-metrics/src/service/data_state/node_identity.rs +++ b/node-metrics/src/service/data_state/node_identity.rs @@ -13,6 +13,7 @@ pub struct NodeIdentity { pub(crate) wallet_address: Option, pub(crate) public_url: Option, pub(crate) company: Option, + pub(crate) company_website: Option, pub(crate) location: Option, pub(crate) operating_system: Option, pub(crate) node_type: Option, @@ -27,6 +28,7 @@ impl NodeIdentity { wallet_address: Option, public_url: Option, company: Option, + company_website: Option, location: Option, operating_system: Option, node_type: Option, @@ -38,6 +40,7 @@ impl NodeIdentity { wallet_address, public_url, company, + company_website, location, operating_system, node_type, @@ -65,6 +68,10 @@ impl NodeIdentity { &self.company } + pub fn company_website(&self) -> &Option { + &self.company_website + } + pub fn location(&self) -> Option<&LocationDetails> { self.location.as_ref() } @@ -88,6 +95,7 @@ impl NodeIdentity { wallet_address: None, public_url: None, company: None, + company_website: None, location: None, operating_system: None, node_type: None, @@ -112,6 +120,7 @@ pub mod tests { Some(Default::default()), Some("https://espressosys.com/".parse().unwrap()), Some("company".to_string()), + Some("https://example.com/".parse().unwrap()), Some(LocationDetails::new( Some((0.0, 0.0)), Some("US".to_string()), diff --git a/process-compose.yaml b/process-compose.yaml index be998eaac..0357dec30 100644 --- a/process-compose.yaml +++ b/process-compose.yaml @@ -120,6 +120,7 @@ processes: - ESPRESSO_SEQUENCER_IDENTITY_NODE_NAME=sequencer0 - ESPRESSO_SEQUENCER_IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000000 - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_NAME=Espresso Systems + - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_WEBSITE=https://www.espressosys.com/ - ESPRESSO_SEQUENCER_IDENTITY_OPERATING_SYSTEM=Linux 5.15.153.1 - ESPRESSO_SEQUENCER_IDENTITY_NETWORK_TYPE=local - ESPRESSO_SEQUENCER_IDENTITY_COUNTRY_CODE=US @@ -171,6 +172,7 @@ processes: - ESPRESSO_SEQUENCER_IDENTITY_NODE_NAME=sequencer1 - ESPRESSO_SEQUENCER_IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000001 - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_NAME=Espresso Systems + - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_WEBSITE=https://www.espressosys.com/ - ESPRESSO_SEQUENCER_IDENTITY_OPERATING_SYSTEM=Darwin 23.5.0 - ESPRESSO_SEQUENCER_IDENTITY_NETWORK_TYPE=local - ESPRESSO_SEQUENCER_IDENTITY_COUNTRY_CODE=GR @@ -216,6 +218,7 @@ processes: - ESPRESSO_SEQUENCER_IDENTITY_NODE_NAME=sequencer2 - ESPRESSO_SEQUENCER_IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000002 - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_NAME=Espresso Systems + - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_WEBSITE=https://www.espressosys.com/ - ESPRESSO_SEQUENCER_IDENTITY_OPERATING_SYSTEM=Darwin 23.5.0 - ESPRESSO_SEQUENCER_IDENTITY_NETWORK_TYPE=local - ESPRESSO_SEQUENCER_IDENTITY_COUNTRY_CODE=CN @@ -258,6 +261,7 @@ processes: - ESPRESSO_SEQUENCER_IDENTITY_NODE_NAME=sequencer3 - ESPRESSO_SEQUENCER_IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000003 - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_NAME=Espresso Systems + - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_WEBSITE=https://www.espressosys.com/ - ESPRESSO_SEQUENCER_IDENTITY_OPERATING_SYSTEM=Microsoft Windows NT 10.0.22621.0 - ESPRESSO_SEQUENCER_IDENTITY_E=espresso-sequencer@0.1.0 - ESPRESSO_SEQUENCER_IDENTITY_NETWORK_TYPE=local @@ -299,6 +303,7 @@ processes: - ESPRESSO_SEQUENCER_IDENTITY_NODE_NAME=sequencer4 - ESPRESSO_SEQUENCER_IDENTITY_WALLET_ADDRESS=0x0000000000000000000000000000000000000004 - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_NAME=Espresso Systems + - ESPRESSO_SEQUENCER_IDENTITY_COMPANY_WEBSITE=https://www.espressosys.com/ - ESPRESSO_SEQUENCER_IDENTITY_OPERATING_SYSTEM=TempleOS 5.03 - ESPRESSO_SEQUENCER_IDENTITY_NETWORK_TYPE=local - ESPRESSO_SEQUENCER_IDENTITY_COUNTRY_CODE=AU diff --git a/sequencer/src/lib.rs b/sequencer/src/lib.rs index 724f350c6..f8c69ba73 100644 --- a/sequencer/src/lib.rs +++ b/sequencer/src/lib.rs @@ -151,6 +151,7 @@ pub async fn init_node( "name".into(), "wallet".into(), "company_name".into(), + "company_website".into(), "operating_system".into(), "node_type".into(), "network_type".into(), @@ -160,6 +161,7 @@ pub async fn init_node( std::env::var("ESPRESSO_SEQUENCER_IDENTITY_NODE_NAME").unwrap_or("".into()), std::env::var("ESPRESSO_SEQUENCER_IDENTITY_WALLET_ADDRESS").unwrap_or("".into()), std::env::var("ESPRESSO_SEQUENCER_IDENTITY_COMPANY_NAME").unwrap_or("".into()), + std::env::var("ESPRESSO_SEQUENCER_IDENTITY_COMPANY_WEBSITE").unwrap_or("".into()), std::env::var("ESPRESSO_SEQUENCER_IDENTITY_OPERATING_SYSTEM").unwrap_or("".into()), std::env::var("ESPRESSO_SEQUENCER_IDENTITY_NODE_TYPE") .unwrap_or(format!("espresso-sequencer {}", Ver::VERSION)), From 32108b1e94a88401e72fc44b7b65b785c82898d3 Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Thu, 25 Jul 2024 07:21:46 -0600 Subject: [PATCH 46/72] Remove unused main.rs --- node-metrics/src/main.rs | 45 ---------------------------------------- 1 file changed, 45 deletions(-) delete mode 100644 node-metrics/src/main.rs diff --git a/node-metrics/src/main.rs b/node-metrics/src/main.rs deleted file mode 100644 index 7f1b5d3f3..000000000 --- a/node-metrics/src/main.rs +++ /dev/null @@ -1,45 +0,0 @@ -use vbs::version::{StaticVersion, Version}; - -/// CONSTANT for protocol major version -pub const VERSION_MAJ: u16 = 0; - -/// CONSTANT for protocol major version -pub const VERSION_MIN: u16 = 1; - -pub const VERSION_0_1: Version = Version { - major: VERSION_MAJ, - minor: VERSION_MIN, -}; - -/// Constant for the base protocol version in this instance of HotShot. -pub const BASE_VERSION: Version = VERSION_0_1; - -/// Type for protocol static version 0.1. -pub type Version01 = StaticVersion; - -/// This represents the latest version of this service. This will likely -/// always be whatever the max API version that's being served is. -pub const SERVICE_VER_0_1: Version01 = StaticVersion {}; - -/// The client definition for the Push CDN. Uses the Quic -/// protocol and no middleware. Differs from the user -/// definition in that is on the client-side. -#[derive(Clone)] -pub struct ClientDef; - -/// ClientConnectionMessage is a message that indicates when a client is -/// connecting or disconnecting from the service. This message is used -/// to signify when the client arrives or leaves. -pub enum ClientConnectionMessage { - Connected, - Disconnected, -} - -#[async_std::main] -async fn main() { - // We have two separate states we want to maintain as much as possible. - // The first is the Data State, which contains all of the recorded state - // we want to keep track of and to be able to relay at a moment's notice. - // The second is a state of the connected clients. This state should be - // able to be read from and written to indirectly by the clients. -} From c934c8e314804358bd5ebd1b6031f1f4c07ce991 Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Thu, 25 Jul 2024 08:37:15 -0600 Subject: [PATCH 47/72] Remove sequencer from dependency for node-metrics --- Cargo.lock | 1 - node-metrics/Cargo.toml | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2311b0b5b..466e19abd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6454,7 +6454,6 @@ dependencies = [ "hotshot-types", "prometheus-parse", "reqwest 0.12.5", - "sequencer", "serde", "serde_json", "surf-disco", diff --git a/node-metrics/Cargo.toml b/node-metrics/Cargo.toml index f2b058641..cb89cad04 100644 --- a/node-metrics/Cargo.toml +++ b/node-metrics/Cargo.toml @@ -6,7 +6,7 @@ authors = { workspace = true } edition = { workspace = true } [features] -testing = ["sequencer/testing", "serde_json", "espresso-types/testing"] +testing = ["serde_json", "espresso-types/testing"] [dependencies] async-compatibility-layer = { workspace = true } @@ -20,7 +20,6 @@ hotshot-stake-table = { workspace = true } hotshot-types = { workspace = true } prometheus-parse = { version = "^0.2.5" } reqwest = { workspace = true } -sequencer = { path = "../sequencer" } serde = { workspace = true } serde_json = { version = "^1.0.113", optional = true } surf-disco = { workspace = true } From 02c30a6e0114a4f9b3659968183017a8c193c36c Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Thu, 25 Jul 2024 12:01:25 -0600 Subject: [PATCH 48/72] Refactor wrap async tasks in separate structures / impls In an effort to keep in line with similar task implementations, and for better organization and automatic cleanup on `drop`, the async task processing has been adjusted to be created and managed by a struct. --- .../v0/create_node_validator_api.rs | 144 ++--- node-metrics/src/api/node_validator/v0/mod.rs | 287 +++++++--- node-metrics/src/service/client_state/mod.rs | 513 +++++++++++++----- node-metrics/src/service/data_state/mod.rs | 263 ++++++--- 4 files changed, 808 insertions(+), 399 deletions(-) diff --git a/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs b/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs index 1efe92c5d..1ff83f25c 100644 --- a/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs +++ b/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs @@ -1,22 +1,20 @@ use std::sync::Arc; use super::{ - get_stake_table_from_sequencer, process_node_identity_url_stream, - stream_leaves_from_hotshot_query_service, + get_stake_table_from_sequencer, ProcessNodeIdentityUrlStreamTask, ProcessProduceLeafStreamTask, }; use crate::service::{ client_id::ClientId, client_message::InternalClientMessage, client_state::{ - process_distribute_block_detail_handling_stream, - process_distribute_node_identity_handling_stream, - process_distribute_voters_handling_stream, process_internal_client_message_stream, - ClientThreadState, + ClientThreadState, InternalClientMessageProcessingTask, + ProcessDistributeBlockDetailHandlingTask, ProcessDistributeNodeIdentityHandlingTask, + ProcessDistributeVotersHandlingTask, }, - data_state::{process_leaf_stream, process_node_identity_stream, DataState}, + data_state::{DataState, ProcessLeafStreamTask, ProcessNodeIdentityStreamTask}, server_message::ServerMessage, }; -use async_std::{stream::StreamExt, sync::RwLock, task::JoinHandle}; +use async_std::sync::RwLock; use futures::{ channel::mpsc::{self, Receiver, Sender}, SinkExt, @@ -24,7 +22,14 @@ use futures::{ use url::Url; pub struct NodeValidatorAPI { - pub task_handles: Vec>, + pub process_internal_client_message_handle: Option, + pub process_distribute_block_detail_handle: Option, + pub process_distribute_node_identity_handle: Option, + pub process_distribute_voters_handle: Option, + pub process_leaf_stream_handle: Option, + pub process_node_identity_stream_handle: Option, + pub process_url_stream_handle: Option, + pub process_consume_leaves: Option, } pub struct NodeValidatorConfig { @@ -47,7 +52,7 @@ pub enum CreateNodeValidatorProcessingError { */ pub async fn create_node_validator_processing( config: NodeValidatorConfig, - server_message_receiver: Receiver>>, + internal_client_message_receiver: Receiver>>, ) -> Result { let mut data_state = DataState::new( Default::default(), @@ -64,9 +69,10 @@ pub async fn create_node_validator_processing( ClientId::from_count(1), ); - let client = surf_disco::Client::new(config.stake_table_url_base); + let client_stake_table = surf_disco::Client::new(config.stake_table_url_base.clone()); + let client_leaf_stream = surf_disco::Client::new(config.stake_table_url_base); - let stake_table = get_stake_table_from_sequencer(client.clone()) + let stake_table = get_stake_table_from_sequencer(client_stake_table) .await .map_err(CreateNodeValidatorProcessingError::FailedToGetStakeTable)?; @@ -81,85 +87,45 @@ pub async fn create_node_validator_processing( let (voters_sender, voters_receiver) = mpsc::channel(32); let (mut url_sender, url_receiver) = mpsc::channel(32); - let process_internal_client_message_handle = - async_std::task::spawn(process_internal_client_message_stream( - server_message_receiver, - data_state.clone(), - client_thread_state.clone(), - )); - - let process_distribute_block_detail_handle = - async_std::task::spawn(process_distribute_block_detail_handling_stream( - client_thread_state.clone(), - block_detail_receiver, - )); - - let process_distribute_node_identity_handle = - async_std::task::spawn(process_distribute_node_identity_handling_stream( - client_thread_state.clone(), - node_identity_receiver_2, - )); - - let process_distribute_voters_handle = async_std::task::spawn( - process_distribute_voters_handling_stream(client_thread_state.clone(), voters_receiver), + let process_internal_client_message_handle = InternalClientMessageProcessingTask::new( + internal_client_message_receiver, + data_state.clone(), + client_thread_state.clone(), + ); + + let process_distribute_block_detail_handle = ProcessDistributeBlockDetailHandlingTask::new( + client_thread_state.clone(), + block_detail_receiver, ); - let process_leaf_stream_handle = async_std::task::spawn(process_leaf_stream( + let process_distribute_node_identity_handle = ProcessDistributeNodeIdentityHandlingTask::new( + client_thread_state.clone(), + node_identity_receiver_2, + ); + + let process_distribute_voters_handle = + ProcessDistributeVotersHandlingTask::new(client_thread_state.clone(), voters_receiver); + + let process_leaf_stream_handle = ProcessLeafStreamTask::new( leaf_receiver, data_state.clone(), block_detail_sender, voters_sender, - )); + ); - let process_node_identity_stream_handle = async_std::task::spawn(process_node_identity_stream( + let process_node_identity_stream_handle = ProcessNodeIdentityStreamTask::new( node_identity_receiver_1, data_state.clone(), node_identity_sender_2, - )); - - let process_url_stream_handle = async_std::task::spawn(process_node_identity_url_stream( - url_receiver, - node_identity_sender_1, - )); - - let leaf_retriever_handle = async_std::task::spawn(async move { - // Alright, let's start processing leaves - // TODO: We should move this into its own function that can respond - // and react appropriately when a service or sequencer does down - // so that it can gracefully re-establish the stream as necessary. - - let client = client; - - let mut leaf_stream = match stream_leaves_from_hotshot_query_service(None, client).await { - Ok(leaf_stream) => leaf_stream, - Err(err) => { - tracing::info!("error getting leaf stream: {}", err); - return; - } - }; - - let mut leaf_sender = leaf_sender; + ); - loop { - let leaf_result = leaf_stream.next().await; - let leaf = if let Some(Ok(leaf)) = leaf_result { - leaf - } else { - tracing::info!("leaf stream closed"); - break; - }; + let process_url_stream_handle = + ProcessNodeIdentityUrlStreamTask::new(url_receiver, node_identity_sender_1); - let leaf_send_result = leaf_sender.send(leaf).await; - if let Err(err) = leaf_send_result { - tracing::info!("leaf sender closed: {}", err); - break; - } - } - }); + let process_consume_leaves = ProcessProduceLeafStreamTask::new(client_leaf_stream, leaf_sender); - // send the original three node base urls - // This is assuming that demo-native is running, as such those Urls - // should be used / match + // Send any initial URLS to the url sender for immediate processing. + // These urls are supplied by the configuration of this function { let urls = config.initial_node_public_base_urls; @@ -173,16 +139,14 @@ pub async fn create_node_validator_processing( } Ok(NodeValidatorAPI { - task_handles: vec![ - process_internal_client_message_handle, - process_distribute_block_detail_handle, - process_distribute_node_identity_handle, - process_distribute_voters_handle, - process_leaf_stream_handle, - process_node_identity_stream_handle, - process_url_stream_handle, - leaf_retriever_handle, - ], + process_internal_client_message_handle: Some(process_internal_client_message_handle), + process_distribute_block_detail_handle: Some(process_distribute_block_detail_handle), + process_distribute_node_identity_handle: Some(process_distribute_node_identity_handle), + process_distribute_voters_handle: Some(process_distribute_voters_handle), + process_leaf_stream_handle: Some(process_leaf_stream_handle), + process_node_identity_stream_handle: Some(process_node_identity_stream_handle), + process_url_stream_handle: Some(process_url_stream_handle), + process_consume_leaves: Some(process_consume_leaves), }) } @@ -257,8 +221,6 @@ mod test { app_serve_handle.await; - for handle in node_validator_task_state.task_handles { - handle.cancel().await; - } + drop(node_validator_task_state); } } diff --git a/node-metrics/src/api/node_validator/v0/mod.rs b/node-metrics/src/api/node_validator/v0/mod.rs index 5cac91004..e960f8fbf 100644 --- a/node-metrics/src/api/node_validator/v0/mod.rs +++ b/node-metrics/src/api/node_validator/v0/mod.rs @@ -2,13 +2,16 @@ pub mod create_node_validator_api; use crate::service::client_message::{ClientMessage, InternalClientMessage}; use crate::service::data_state::{LocationDetails, NodeIdentity}; use crate::service::server_message::ServerMessage; -use espresso_types::FeeAccount; +use async_std::task::JoinHandle; +use espresso_types::{FeeAccount, SeqTypes}; +use futures::channel::mpsc::SendError; use futures::future::Either; -use futures::Sink; use futures::{ channel::mpsc::{self, Sender}, FutureExt, SinkExt, StreamExt, }; +use futures::{Sink, Stream}; +use hotshot_query_service::Leaf; use hotshot_stake_table::vec_based::StakeTable; use hotshot_types::light_client::{CircuitField, StateVerKey}; use hotshot_types::signature_key::BLSPubKey; @@ -405,53 +408,139 @@ pub async fn get_node_identity_from_url( } } -/// [stream_leaves_from_hotshot_query_service] retrieves a stream of -/// [sequencer::Leaf]s from the Hotshot Query Service. It expects a -/// [current_block_height] to be provided so that it can determine the starting -/// block height to begin streaming from. No matter what the value of -/// [current_block_height] is the stream will always check what the latest -/// block height is on the hotshot query service. It will then attempt to -/// pull as few Leaves as it needs from the stream. -pub async fn stream_leaves_from_hotshot_query_service( - current_block_height: Option, - client: surf_disco::Client, -) -> Result< - impl futures::Stream> + Unpin, - hotshot_query_service::Error, -> { - let block_height_result = client.get("status/block-height").send().await; - let block_height: u64 = match block_height_result { - Ok(block_height) => block_height, - Err(err) => { - tracing::info!("retrieve block height request failed: {}", err); - return Err(err); +/// [ProcessProduceLeafStreamTask] is a task that produce a stream of [Leaf]s +/// from the Hotshot Query Service. It will attempt to retrieve the [Leaf]s +/// from the Hotshot Query Service and then send them to the [Sink] provided. +pub struct ProcessProduceLeafStreamTask { + pub task_handle: Option>, +} + +impl ProcessProduceLeafStreamTask { + /// [new] creates a new [ProcessConsumeLeafStreamTask] that produces a + /// stream of [Leaf]s from the Hotshot Query Service. + /// + /// Calling this function will create an async task that will start + /// processing immediately. The task's handle will be stored in the + /// returned state. + pub fn new( + client: surf_disco::Client, + leaf_sender: K, + ) -> Self + where + K: Sink, Error = SendError> + Clone + Send + Sync + Unpin + 'static, + { + let task_handle = + async_std::task::spawn(Self::process_consume_leaf_stream(client, leaf_sender)); + + Self { + task_handle: Some(task_handle), } - }; + } - let latest_block_start = block_height.saturating_sub(50); - let start_block_height = if let Some(known_height) = current_block_height { - std::cmp::min(known_height, latest_block_start) - } else { - latest_block_start - }; + /// [process_consume_leaf_stream] produces a stream of [Leaf]s from the + /// Hotshot Query Service. It will attempt to retrieve the [Leaf]s from the + /// Hotshot Query Service and then send them to the [Sink] provided. If the + /// [Sink] is closed, or if the Stream ends prematurely, then the function + /// will return. + async fn process_consume_leaf_stream( + client: surf_disco::Client, + leaf_sender: K, + ) where + K: Sink, Error = SendError> + Clone + Send + Sync + Unpin + 'static, + { + // Alright, let's start processing leaves + // TODO: We should move this into its own function that can respond + // and react appropriately when a service or sequencer does down + // so that it can gracefully re-establish the stream as necessary. - let leaves_stream_result = client - .socket(&format!( - "availability/stream/leaves/{}", - start_block_height - )) - .subscribe::() - .await; + let client = client; - let leaves_stream = match leaves_stream_result { - Ok(leaves_stream) => leaves_stream, - Err(err) => { - tracing::info!("retrieve leaves stream failed: {}", err); - return Err(err); + let mut leaf_stream = + match Self::stream_leaves_from_hotshot_query_service(None, client).await { + Ok(leaf_stream) => leaf_stream, + Err(err) => { + tracing::info!("error getting leaf stream: {}", err); + return; + } + }; + + let mut leaf_sender = leaf_sender; + + loop { + let leaf_result = leaf_stream.next().await; + let leaf = if let Some(Ok(leaf)) = leaf_result { + leaf + } else { + tracing::info!("leaf stream closed"); + break; + }; + + let leaf_send_result = leaf_sender.send(leaf).await; + if let Err(err) = leaf_send_result { + tracing::info!("leaf sender closed: {}", err); + break; + } } - }; + } + + /// [stream_leaves_from_hotshot_query_service] retrieves a stream of + /// [sequencer::Leaf]s from the Hotshot Query Service. It expects a + /// [current_block_height] to be provided so that it can determine the starting + /// block height to begin streaming from. No matter what the value of + /// [current_block_height] is the stream will always check what the latest + /// block height is on the hotshot query service. It will then attempt to + /// pull as few Leaves as it needs from the stream. + async fn stream_leaves_from_hotshot_query_service( + current_block_height: Option, + client: surf_disco::Client, + ) -> Result< + impl futures::Stream> + Unpin, + hotshot_query_service::Error, + > { + let block_height_result = client.get("status/block-height").send().await; + let block_height: u64 = match block_height_result { + Ok(block_height) => block_height, + Err(err) => { + tracing::info!("retrieve block height request failed: {}", err); + return Err(err); + } + }; + + let latest_block_start = block_height.saturating_sub(50); + let start_block_height = if let Some(known_height) = current_block_height { + std::cmp::min(known_height, latest_block_start) + } else { + latest_block_start + }; + + let leaves_stream_result = client + .socket(&format!( + "availability/stream/leaves/{}", + start_block_height + )) + .subscribe::() + .await; + + let leaves_stream = match leaves_stream_result { + Ok(leaves_stream) => leaves_stream, + Err(err) => { + tracing::info!("retrieve leaves stream failed: {}", err); + return Err(err); + } + }; + + Ok(leaves_stream) + } +} - Ok(leaves_stream) +/// [Drop] implementation for [ProcessConsumeLeafStreamTask] that will cancel +/// the task if it hasn't already been completed. +impl Drop for ProcessProduceLeafStreamTask { + fn drop(&mut self) { + if let Some(task_handle) = self.task_handle.take() { + async_std::task::block_on(task_handle.cancel()); + } + } } /// [populate_node_identity_general_from_scrape] populates the general @@ -683,48 +772,90 @@ pub fn node_identity_from_scrape(scrape: Scrape) -> Option { Some(node_identity) } -/// [process_node_identity_url_stream] processes a stream of [Url]s that are -/// expected to contain a Node Identity. It will attempt to retrieve the Node -/// Identity from the [Url] and then send it to the [Sink] provided. If the -/// [Sink] is closed, then the function will return. -pub async fn process_node_identity_url_stream( - node_identity_url_stream: T, - node_identity_sink: K, -) where - T: futures::Stream + Unpin, - K: Sink + Unpin, -{ - let mut node_identity_url_stream = node_identity_url_stream; - let mut node_identity_sender = node_identity_sink; - loop { - let node_identity_url_result = node_identity_url_stream.next().await; - let node_identity_url = match node_identity_url_result { - Some(node_identity_url) => node_identity_url, - None => { - tracing::info!("node identity url stream closed"); - return; - } - }; +/// [ProcessNodeIdentityUrlStreamTask] is a task that processes a stream of +/// [Url]s that are expected to contain a Node Identity. It will attempt to +/// retrieve the Node Identity from the [Url] and then send it to the [Sink] +/// provided. +pub struct ProcessNodeIdentityUrlStreamTask { + pub task_handle: Option>, +} + +impl ProcessNodeIdentityUrlStreamTask { + /// [new] creates a new [ProcessNodeIdentityUrlStreamTask] that processes a + /// stream of [Url]s that are expected to contain a Node Identity. + /// + /// Calling this function will spawn a new task that will start processing + /// immediately. The tasks' handle will be stored in the returned + /// state. + pub fn new(url_receiver: S, node_identity_sender: K) -> Self + where + S: Stream + Send + Sync + Unpin + 'static, + K: Sink + Clone + Send + Sync + Unpin + 'static, + { + let task_handle = async_std::task::spawn(Self::process_node_identity_url_stream( + url_receiver, + node_identity_sender, + )); - // Alright we have a new Url to try and scrape for a Node Identity. - // Let's attempt to do that. - let node_identity_result = get_node_identity_from_url(node_identity_url).await; + Self { + task_handle: Some(task_handle), + } + } - let node_identity = match node_identity_result { - Ok(node_identity) => node_identity, - Err(err) => { - tracing::warn!("get node identity from url failed. bad base url?: {}", err); - continue; + /// [process_node_identity_url_stream] processes a stream of [Url]s that are + /// expected to contain a Node Identity. It will attempt to retrieve the Node + /// Identity from the [Url] and then send it to the [Sink] provided. If the + /// [Sink] is closed, then the function will return. + async fn process_node_identity_url_stream( + node_identity_url_stream: T, + node_identity_sink: K, + ) where + T: futures::Stream + Unpin, + K: Sink + Unpin, + { + let mut node_identity_url_stream = node_identity_url_stream; + let mut node_identity_sender = node_identity_sink; + loop { + let node_identity_url_result = node_identity_url_stream.next().await; + let node_identity_url = match node_identity_url_result { + Some(node_identity_url) => node_identity_url, + None => { + tracing::info!("node identity url stream closed"); + return; + } + }; + + // Alright we have a new Url to try and scrape for a Node Identity. + // Let's attempt to do that. + let node_identity_result = get_node_identity_from_url(node_identity_url).await; + + let node_identity = match node_identity_result { + Ok(node_identity) => node_identity, + Err(err) => { + tracing::warn!("get node identity from url failed. bad base url?: {}", err); + continue; + } + }; + + let send_result = node_identity_sender.send(node_identity).await; + if let Err(err) = send_result { + tracing::info!("node identity sender closed: {}", err); + return; } - }; + } + } +} - let send_result = node_identity_sender.send(node_identity).await; - if let Err(err) = send_result { - tracing::info!("node identity sender closed: {}", err); - return; +/// [ProcessNodeIdentityUrlStreamTask] will cancel the task when it is dropped. +impl Drop for ProcessNodeIdentityUrlStreamTask { + fn drop(&mut self) { + let task_handle = self.task_handle.take(); + if let Some(task_handle) = task_handle { + async_std::task::block_on(task_handle.cancel()); } } } + #[cfg(test)] mod tests { use espresso_types::FeeAccount; diff --git a/node-metrics/src/service/client_state/mod.rs b/node-metrics/src/service/client_state/mod.rs index 44213cbb9..b3826eaea 100644 --- a/node-metrics/src/service/client_state/mod.rs +++ b/node-metrics/src/service/client_state/mod.rs @@ -4,7 +4,10 @@ use super::{ data_state::{DataState, NodeIdentity}, server_message::ServerMessage, }; -use async_std::sync::{RwLock, RwLockWriteGuard}; +use async_std::{ + sync::{RwLock, RwLockWriteGuard}, + task::JoinHandle, +}; use bitvec::vec::BitVec; use espresso_types::SeqTypes; use futures::{channel::mpsc::SendError, Sink, SinkExt, Stream, StreamExt}; @@ -888,124 +891,309 @@ async fn handle_received_voters( drop_failed_client_sends(client_thread_state, failed_client_sends).await; } -/// [process_internal_client_message_stream] is a function that processes the -/// client handling stream. This stream is responsible for managing the state -/// of the connected clients, and their subscriptions. -pub async fn process_internal_client_message_stream( - mut stream: S, - data_state: Arc>, - client_thread_state: Arc>>, -) where - S: Stream> + Unpin, - K: Sink + Clone + Unpin, -{ - loop { - let message_result = stream.next().await; - let message = if let Some(message) = message_result { - message - } else { - tracing::info!("internal client message handler closed."); - return; - }; +/// InternalClientMessageProcessingTask represents an async task for +/// InternalClientMessages, and making the appropriate updates to the +/// [ClientThreadState] and [DataState]. +pub struct InternalClientMessageProcessingTask { + pub task_handle: Option>, +} - if let Err(err) = - process_client_message(message, data_state.clone(), client_thread_state.clone()).await - { - tracing::info!( - "internal client message processing encountered an error: {}", - err, - ); - return; +impl InternalClientMessageProcessingTask { + /// new creates a new [InternalClientMessageProcessingTask] with the + /// given internal_client_message_receiver, data_state, and + /// client_thread_state. + /// + /// Calling this function will start an async task that will start + /// processing. The handle for the async task is stored within the + /// returned state. + pub fn new( + internal_client_message_receiver: S, + data_state: Arc>, + client_thread_state: Arc>>, + ) -> Self + where + S: Stream> + Send + Sync + Unpin + 'static, + K: Sink + Clone + Send + Sync + Unpin + 'static, + { + let task_handle = async_std::task::spawn(Self::process_internal_client_message_stream( + internal_client_message_receiver, + data_state.clone(), + client_thread_state.clone(), + )); + + Self { + task_handle: Some(task_handle), + } + } + + /// [process_internal_client_message_stream] is a function that processes the + /// client handling stream. This stream is responsible for managing the state + /// of the connected clients, and their subscriptions. + async fn process_internal_client_message_stream( + mut stream: S, + data_state: Arc>, + client_thread_state: Arc>>, + ) where + S: Stream> + Unpin, + K: Sink + Clone + Unpin, + { + loop { + let message_result = stream.next().await; + let message = if let Some(message) = message_result { + message + } else { + tracing::info!("internal client message handler closed."); + return; + }; + + if let Err(err) = + process_client_message(message, data_state.clone(), client_thread_state.clone()) + .await + { + tracing::info!( + "internal client message processing encountered an error: {}", + err, + ); + return; + } } } } -/// [process_distribute_block_detail_handling_stream] is a function that -/// processes the the [Stream] of incoming [BlockDetail] and distributes them -/// to all subscribed clients. -pub async fn process_distribute_block_detail_handling_stream( - client_thread_state: Arc>>, - mut stream: S, -) where - S: Stream> + Unpin, - K: Sink + Clone + Unpin, -{ - loop { - let block_detail_result = stream.next().await; - - let block_detail = if let Some(block_detail) = block_detail_result { - block_detail - } else { - tracing::info!("block detail stream closed. shutting down client handling stream.",); - return; - }; +/// [drop] implementation for [InternalClientMessageProcessingTask] that will +/// cancel the task if it is still running. +impl Drop for InternalClientMessageProcessingTask { + fn drop(&mut self) { + let task_handle = self.task_handle.take(); + if let Some(task_handle) = task_handle { + async_std::task::block_on(task_handle.cancel()); + } + } +} + +/// [ProcessDistributeBlockDetailHandlingTask] represents an async task for +/// processing the incoming [BlockDetail] and distributing them to all +/// subscribed clients. +pub struct ProcessDistributeBlockDetailHandlingTask { + pub task_handle: Option>, +} + +impl ProcessDistributeBlockDetailHandlingTask { + /// [new] creates a new [ProcessDistributeBlockDetailHandlingTask] with the + /// given client_thread_state and block_detail_receiver. + /// + /// Calling this function will start an async task that will start + /// processing. The handle for the async task is stored within the + /// returned state. + pub fn new( + client_thread_state: Arc>>, + block_detail_receiver: S, + ) -> Self + where + S: Stream> + Send + Sync + Unpin + 'static, + K: Sink + Clone + Send + Sync + Unpin + 'static, + { + let task_handle = + async_std::task::spawn(Self::process_distribute_block_detail_handling_stream( + client_thread_state.clone(), + block_detail_receiver, + )); - handle_received_block_detail(client_thread_state.clone(), block_detail).await + Self { + task_handle: Some(task_handle), + } + } + + /// [process_distribute_block_detail_handling_stream] is a function that + /// processes the the [Stream] of incoming [BlockDetail] and distributes them + /// to all subscribed clients. + async fn process_distribute_block_detail_handling_stream( + client_thread_state: Arc>>, + mut stream: S, + ) where + S: Stream> + Unpin, + K: Sink + Clone + Unpin, + { + loop { + let block_detail_result = stream.next().await; + + let block_detail = if let Some(block_detail) = block_detail_result { + block_detail + } else { + tracing::info!( + "block detail stream closed. shutting down client handling stream.", + ); + return; + }; + + handle_received_block_detail(client_thread_state.clone(), block_detail).await + } } } -/// [process_distribute_node_identity_handling_stream] is a function that -/// processes the the [Stream] of incoming [NodeIdentity] and distributes them -/// to all subscribed clients. -pub async fn process_distribute_node_identity_handling_stream( - client_thread_state: Arc>>, - mut stream: S, -) where - S: Stream + Unpin, - K: Sink + Clone + Unpin, -{ - loop { - let node_identity_result = stream.next().await; - - let node_identity = if let Some(node_identity) = node_identity_result { - node_identity - } else { - tracing::info!("node identity stream closed. shutting down client handling stream.",); - return; - }; +/// [drop] implementation for [ProcessDistributeBlockDetailHandlingTask] that will +/// cancel the task if it is still running. +impl Drop for ProcessDistributeBlockDetailHandlingTask { + fn drop(&mut self) { + let task_handle = self.task_handle.take(); + if let Some(task_handle) = task_handle { + async_std::task::block_on(task_handle.cancel()); + } + } +} + +/// [ProcessDistributeNodeIdentityHandlingTask] represents an async task for +/// processing the incoming [NodeIdentity] and distributing them to all +/// subscribed clients. +pub struct ProcessDistributeNodeIdentityHandlingTask { + pub task_handle: Option>, +} - handle_received_node_identity(client_thread_state.clone(), node_identity).await +impl ProcessDistributeNodeIdentityHandlingTask { + /// [new] creates a new [ProcessDistributeNodeIdentityHandlingTask] with the + /// given client_thread_state and node_identity_receiver. + /// + /// Calling this function will start an async task that will start + /// processing. The handle for the async task is stored within the + /// returned state. + pub fn new( + client_thread_state: Arc>>, + node_identity_receiver: S, + ) -> Self + where + S: Stream + Send + Sync + Unpin + 'static, + K: Sink + Clone + Send + Sync + Unpin + 'static, + { + let task_handle = + async_std::task::spawn(Self::process_distribute_node_identity_handling_stream( + client_thread_state.clone(), + node_identity_receiver, + )); + + Self { + task_handle: Some(task_handle), + } + } + + /// [process_distribute_node_identity_handling_stream] is a function that + /// processes the the [Stream] of incoming [NodeIdentity] and distributes them + /// to all subscribed clients. + async fn process_distribute_node_identity_handling_stream( + client_thread_state: Arc>>, + mut stream: S, + ) where + S: Stream + Unpin, + K: Sink + Clone + Unpin, + { + loop { + let node_identity_result = stream.next().await; + + let node_identity = if let Some(node_identity) = node_identity_result { + node_identity + } else { + tracing::info!( + "node identity stream closed. shutting down client handling stream.", + ); + return; + }; + + handle_received_node_identity(client_thread_state.clone(), node_identity).await + } } } -/// [process_distribute_voters_handling_stream] is a function that processes -/// the the [Stream] of incoming [BitVec] and distributes them to all +/// [drop] implementation for [ProcessDistributeNodeIdentityHandlingTask] that +/// will cancel the task if it is still running. +impl Drop for ProcessDistributeNodeIdentityHandlingTask { + fn drop(&mut self) { + let task_handle = self.task_handle.take(); + if let Some(task_handle) = task_handle { + async_std::task::block_on(task_handle.cancel()); + } + } +} + +/// [ProcessDistributeVotersHandlingTask] represents an async task for +/// processing the incoming [BitVec] and distributing them to all /// subscribed clients. -pub async fn process_distribute_voters_handling_stream( - client_thread_state: Arc>>, - mut stream: S, -) where - S: Stream> + Unpin, - K: Sink + Clone + Unpin, -{ - loop { - let voters_result = stream.next().await; - - let voters = if let Some(voters) = voters_result { - voters - } else { - tracing::info!("voters stream closed. shutting down client handling stream.",); - return; - }; +pub struct ProcessDistributeVotersHandlingTask { + pub task_handle: Option>, +} + +impl ProcessDistributeVotersHandlingTask { + /// [new] creates a new [ProcessDistributeVotersHandlingTask] with the + /// given client_thread_state and voters_receiver. + /// + /// Calling this function will start an async task that will start + /// processing. The handle for the async task is stored within the + /// returned state. + pub fn new( + client_thread_state: Arc>>, + voters_receiver: S, + ) -> Self + where + S: Stream> + Send + Sync + Unpin + 'static, + K: Sink + Clone + Send + Sync + Unpin + 'static, + { + let task_handle = async_std::task::spawn(Self::process_distribute_voters_handling_stream( + client_thread_state.clone(), + voters_receiver, + )); + + Self { + task_handle: Some(task_handle), + } + } + + /// [process_distribute_voters_handling_stream] is a function that processes + /// the the [Stream] of incoming [BitVec] and distributes them to all + /// subscribed clients. + async fn process_distribute_voters_handling_stream( + client_thread_state: Arc>>, + mut stream: S, + ) where + S: Stream> + Unpin, + K: Sink + Clone + Unpin, + { + loop { + let voters_result = stream.next().await; + + let voters = if let Some(voters) = voters_result { + voters + } else { + tracing::info!("voters stream closed. shutting down client handling stream.",); + return; + }; + + handle_received_voters(client_thread_state.clone(), voters).await + } + } +} - handle_received_voters(client_thread_state.clone(), voters).await +/// [drop] implementation for [ProcessDistributeVotersHandlingTask] that will +/// cancel the task if it is still running. +impl Drop for ProcessDistributeVotersHandlingTask { + fn drop(&mut self) { + let task_handle = self.task_handle.take(); + if let Some(task_handle) = task_handle { + async_std::task::block_on(task_handle.cancel()); + } } } #[cfg(test)] pub mod tests { - use super::{process_internal_client_message_stream, ClientThreadState}; + use super::{ClientThreadState, InternalClientMessageProcessingTask}; use crate::service::{ client_id::ClientId, client_message::InternalClientMessage, client_state::{ - process_distribute_block_detail_handling_stream, - process_distribute_node_identity_handling_stream, - process_distribute_voters_handling_stream, + ProcessDistributeBlockDetailHandlingTask, ProcessDistributeNodeIdentityHandlingTask, + ProcessDistributeVotersHandlingTask, }, data_state::{ - create_block_detail_from_leaf, process_leaf_stream, DataState, LocationDetails, - NodeIdentity, + create_block_detail_from_leaf, DataState, LocationDetails, NodeIdentity, + ProcessLeafStreamTask, }, server_message::ServerMessage, }; @@ -1103,18 +1291,20 @@ pub mod tests { let (mut internal_client_message_sender, internal_client_message_receiver) = mpsc::channel(1); - let process_internal_client_message_handle: async_std::task::JoinHandle<()> = - async_std::task::spawn(process_internal_client_message_stream( - internal_client_message_receiver, - data_state, - client_thread_state, - )); + let mut process_internal_client_message_handle = InternalClientMessageProcessingTask::new( + internal_client_message_receiver, + data_state, + client_thread_state, + ); // disconnect the last internal client message sender internal_client_message_sender.disconnect(); // Join the async task. if let Err(timeout_error) = process_internal_client_message_handle + .task_handle + .take() + .unwrap() .timeout(Duration::from_millis(200)) .await { @@ -1139,12 +1329,11 @@ pub mod tests { let (internal_client_message_sender, internal_client_message_receiver) = mpsc::channel(1); let (server_message_sender_1, mut server_message_receiver_1) = mpsc::channel(1); let (server_message_sender_2, mut server_message_receiver_2) = mpsc::channel(1); - let process_internal_client_message_handle = - async_std::task::spawn(process_internal_client_message_stream( - internal_client_message_receiver, - data_state, - client_thread_state, - )); + let mut process_internal_client_message_handle = InternalClientMessageProcessingTask::new( + internal_client_message_receiver, + data_state, + client_thread_state, + ); // Send a Connected Message to the server let mut internal_client_message_sender_1 = internal_client_message_sender.clone(); @@ -1199,6 +1388,9 @@ pub mod tests { assert_eq!(server_message_receiver_2.next().await, None); if let Err(timeout_error) = process_internal_client_message_handle + .task_handle + .take() + .unwrap() .timeout(Duration::from_millis(200)) .await { @@ -1226,12 +1418,11 @@ pub mod tests { let (internal_client_message_sender, internal_client_message_receiver) = mpsc::channel(1); let (server_message_sender_1, mut server_message_receiver_1) = mpsc::channel(1); let (server_message_sender_2, mut server_message_receiver_2) = mpsc::channel(1); - let process_internal_client_message_handle = - async_std::task::spawn(process_internal_client_message_stream( - internal_client_message_receiver, - data_state, - client_thread_state, - )); + let mut process_internal_client_message_handle = InternalClientMessageProcessingTask::new( + internal_client_message_receiver, + data_state, + client_thread_state, + ); // Send a Connected Message to the server let mut internal_client_message_sender_1 = internal_client_message_sender.clone(); @@ -1285,6 +1476,9 @@ pub mod tests { // Join the async task. if let Err(timeout_error) = process_internal_client_message_handle + .task_handle + .take() + .unwrap() .timeout(Duration::from_millis(200)) .await { @@ -1304,12 +1498,11 @@ pub mod tests { let (internal_client_message_sender, internal_client_message_receiver) = mpsc::channel(1); let (server_message_sender_1, mut server_message_receiver_1) = mpsc::channel(1); let (server_message_sender_2, mut server_message_receiver_2) = mpsc::channel(1); - let process_internal_client_message_handle: async_std::task::JoinHandle<()> = - async_std::task::spawn(process_internal_client_message_stream( - internal_client_message_receiver, - data_state, - client_thread_state, - )); + let mut process_internal_client_message_handle = InternalClientMessageProcessingTask::new( + internal_client_message_receiver, + data_state, + client_thread_state, + ); // Send a Connected Message to the server let mut internal_client_message_sender_1 = internal_client_message_sender.clone(); @@ -1370,6 +1563,9 @@ pub mod tests { // Join the async task. if let Err(timeout_error) = process_internal_client_message_handle + .task_handle + .take() + .unwrap() .timeout(Duration::from_millis(200)) .await { @@ -1393,29 +1589,27 @@ pub mod tests { let (server_message_sender_1, mut server_message_receiver_1) = mpsc::channel(1); let (server_message_sender_2, mut server_message_receiver_2) = mpsc::channel(1); let (server_message_sender_3, mut server_message_receiver_3) = mpsc::channel(1); - let process_internal_client_message_handle = - async_std::task::spawn(process_internal_client_message_stream( - internal_client_message_receiver, - data_state.clone(), - client_thread_state.clone(), - )); + let mut process_internal_client_message_handle = InternalClientMessageProcessingTask::new( + internal_client_message_receiver, + data_state.clone(), + client_thread_state.clone(), + ); - let process_distribute_block_detail_handle = - async_std::task::spawn(process_distribute_block_detail_handling_stream( + let mut process_distribute_block_detail_handle = + ProcessDistributeBlockDetailHandlingTask::new( client_thread_state.clone(), block_detail_receiver, - )); + ); - let process_distribute_voters_handle = async_std::task::spawn( - process_distribute_voters_handling_stream(client_thread_state, voters_receiver), - ); + let mut process_distribute_voters_handle = + ProcessDistributeVotersHandlingTask::new(client_thread_state, voters_receiver); - let process_leaf_stream_handle = async_std::task::spawn(process_leaf_stream( + let mut process_leaf_stream_handle = ProcessLeafStreamTask::new( leaf_receiver, data_state, block_detail_sender, voters_sender, - )); + ); // Send a Connected Message to the server let mut internal_client_message_sender_1 = internal_client_message_sender.clone(); @@ -1500,6 +1694,9 @@ pub mod tests { // Join the async task. if let Err(timeout_error) = process_leaf_stream_handle + .task_handle + .take() + .unwrap() .timeout(Duration::from_millis(200)) .await { @@ -1511,6 +1708,9 @@ pub mod tests { // Join the async task. if let Err(timeout_error) = process_distribute_block_detail_handle + .task_handle + .take() + .unwrap() .timeout(Duration::from_millis(200)) .await { @@ -1521,6 +1721,9 @@ pub mod tests { } if let Err(timeout_error) = process_distribute_voters_handle + .task_handle + .take() + .unwrap() .timeout(Duration::from_millis(200)) .await { @@ -1543,6 +1746,9 @@ pub mod tests { // Join the async task. if let Err(timeout_error) = process_internal_client_message_handle + .task_handle + .take() + .unwrap() .timeout(Duration::from_millis(200)) .await { @@ -1564,18 +1770,17 @@ pub mod tests { let (server_message_sender_1, mut server_message_receiver_1) = mpsc::channel(1); let (server_message_sender_2, mut server_message_receiver_2) = mpsc::channel(1); let (server_message_sender_3, mut server_message_receiver_3) = mpsc::channel(1); - let process_internal_client_message_handle = - async_std::task::spawn(process_internal_client_message_stream( - internal_client_message_receiver, - data_state.clone(), - client_thread_state.clone(), - )); + let mut process_internal_client_message_handle = InternalClientMessageProcessingTask::new( + internal_client_message_receiver, + data_state.clone(), + client_thread_state.clone(), + ); - let process_distribute_node_identity_handle = - async_std::task::spawn(process_distribute_node_identity_handling_stream( + let mut process_distribute_node_identity_handle = + ProcessDistributeNodeIdentityHandlingTask::new( client_thread_state, node_identity_receiver, - )); + ); // Send a Connected Message to the server let mut internal_client_message_sender_1 = internal_client_message_sender.clone(); @@ -1662,6 +1867,9 @@ pub mod tests { // Join the async task. if let Err(timeout_error) = process_distribute_node_identity_handle + .task_handle + .take() + .unwrap() .timeout(Duration::from_millis(200)) .await { @@ -1684,6 +1892,9 @@ pub mod tests { // Join the async task. if let Err(timeout_error) = process_internal_client_message_handle + .task_handle + .take() + .unwrap() .timeout(Duration::from_millis(200)) .await { @@ -1705,17 +1916,15 @@ pub mod tests { let (server_message_sender_1, mut server_message_receiver_1) = mpsc::channel(1); let (server_message_sender_2, mut server_message_receiver_2) = mpsc::channel(1); let (server_message_sender_3, mut server_message_receiver_3) = mpsc::channel(1); - let process_internal_client_message_handle = - async_std::task::spawn(process_internal_client_message_stream( - internal_client_message_receiver, - data_state.clone(), - client_thread_state.clone(), - )); - - let process_distribute_voters_handle = async_std::task::spawn( - process_distribute_voters_handling_stream(client_thread_state, voters_receiver), + let mut process_internal_client_message_handle = InternalClientMessageProcessingTask::new( + internal_client_message_receiver, + data_state.clone(), + client_thread_state.clone(), ); + let mut process_distribute_voters_handle = + ProcessDistributeVotersHandlingTask::new(client_thread_state, voters_receiver); + // Send a Connected Message to the server let mut internal_client_message_sender_1 = internal_client_message_sender.clone(); assert_eq!( @@ -1796,6 +2005,9 @@ pub mod tests { // Join the async task. if let Err(timeout_error) = process_distribute_voters_handle + .task_handle + .take() + .unwrap() .timeout(Duration::from_millis(200)) .await { @@ -1818,6 +2030,9 @@ pub mod tests { // Join the async task. if let Err(timeout_error) = process_internal_client_message_handle + .task_handle + .take() + .unwrap() .timeout(Duration::from_millis(200)) .await { diff --git a/node-metrics/src/service/data_state/mod.rs b/node-metrics/src/service/data_state/mod.rs index e05c06be4..2ea1caafd 100644 --- a/node-metrics/src/service/data_state/mod.rs +++ b/node-metrics/src/service/data_state/mod.rs @@ -1,7 +1,7 @@ pub mod location_details; pub mod node_identity; -use async_std::sync::RwLock; +use async_std::{sync::RwLock, task::JoinHandle}; use bitvec::vec::BitVec; use circular_buffer::CircularBuffer; use espresso_types::{Header, Payload, SeqTypes}; @@ -304,41 +304,89 @@ where Ok(()) } -/// [process_leaf_stream] allows for the consumption of a [Stream] when -/// attempting to process new incoming [Leaf]s. -pub async fn process_leaf_stream( - mut stream: S, - data_state: Arc>, - block_sender: BDSink, - voters_senders: BVSink, -) where - S: Stream> + Unpin, - Header: BlockHeader + QueryableHeader + ExplorerHeader, - Payload: BlockPayload, - BDSink: Sink, Error = SendError> + Clone + Unpin, - BVSink: Sink, Error = SendError> + Clone + Unpin, -{ - loop { - let leaf_result = stream.next().await; - let leaf = if let Some(leaf) = leaf_result { - leaf - } else { - // We have reached the end of the stream - tracing::info!("process leaf stream: end of stream reached for leaf stream."); - return; - }; +/// [ProcessLeafStreamTask] represents the task that is responsible for +/// processing a stream of incoming [Leaf]s. +pub struct ProcessLeafStreamTask { + pub task_handle: Option>, +} - if let Err(err) = process_incoming_leaf( - leaf, +impl ProcessLeafStreamTask { + /// [new] creates a new [ProcessLeafStreamTask] that will process a stream + /// of incoming [Leaf]s. + /// + /// Calling this function will create an asynchronous task that will start + /// processing immediately. The handle for the task will be stored within + /// the returned structure. + pub fn new( + leaf_receiver: S, + data_state: Arc>, + block_detail_sender: K1, + voters_sender: K2, + ) -> Self + where + S: Stream> + Send + Sync + Unpin + 'static, + K1: Sink, Error = SendError> + Clone + Send + Sync + Unpin + 'static, + K2: Sink, Error = SendError> + Clone + Send + Sync + Unpin + 'static, + { + let task_handle = async_std::task::spawn(Self::process_leaf_stream( + leaf_receiver, data_state.clone(), - block_sender.clone(), - voters_senders.clone(), - ) - .await - { - // We have an error that prevents us from continuing - tracing::info!("process leaf stream: error processing leaf: {}", err); - break; + block_detail_sender, + voters_sender, + )); + + Self { + task_handle: Some(task_handle), + } + } + + /// [process_leaf_stream] allows for the consumption of a [Stream] when + /// attempting to process new incoming [Leaf]s. + async fn process_leaf_stream( + mut stream: S, + data_state: Arc>, + block_sender: BDSink, + voters_senders: BVSink, + ) where + S: Stream> + Unpin, + Header: BlockHeader + QueryableHeader + ExplorerHeader, + Payload: BlockPayload, + BDSink: Sink, Error = SendError> + Clone + Unpin, + BVSink: Sink, Error = SendError> + Clone + Unpin, + { + loop { + let leaf_result = stream.next().await; + let leaf = if let Some(leaf) = leaf_result { + leaf + } else { + // We have reached the end of the stream + tracing::info!("process leaf stream: end of stream reached for leaf stream."); + return; + }; + + if let Err(err) = process_incoming_leaf( + leaf, + data_state.clone(), + block_sender.clone(), + voters_senders.clone(), + ) + .await + { + // We have an error that prevents us from continuing + tracing::info!("process leaf stream: error processing leaf: {}", err); + break; + } + } + } +} + +/// [Drop] implementation for [ProcessLeafStreamTask] that will cancel the +/// task if it is dropped. +impl Drop for ProcessLeafStreamTask { + fn drop(&mut self) { + let task_handle = self.task_handle.take(); + if let Some(task_handle) = task_handle { + async_std::task::block_on(task_handle.cancel()); } } } @@ -393,53 +441,101 @@ where Ok(()) } -/// [process_node_identity_stream] allows for the consumption of a [Stream] when -/// attempting to process new incoming [NodeIdentity]s. -/// This function will process the incoming [NodeIdentity] and update the -/// [DataState] with the new information. -/// Additionally, the [NodeIdentity] will be sent to the [Sink] so that it can -/// be processed for real-time considerations. -pub async fn process_node_identity_stream( - mut stream: S, - data_state: Arc>, - node_identity_sender: NISink, -) where - S: Stream + Unpin, - NISink: Sink + Clone + Unpin, -{ - loop { - let node_identity_result = stream.next().await; - let node_identity = if let Some(node_identity) = node_identity_result { - node_identity - } else { - // We have reached the end of the stream - tracing::info!( - "process node identity stream: end of stream reached for node identity stream." - ); - return; - }; +/// [ProcessNodeIdentityStreamTask] represents the task that is responsible for +/// processing a stream of incoming [NodeIdentity]s and updating the [DataState] +/// with the new information. +pub struct ProcessNodeIdentityStreamTask { + pub task_handle: Option>, +} - if let Err(err) = process_incoming_node_identity( - node_identity, +impl ProcessNodeIdentityStreamTask { + /// [new] creates a new [ProcessNodeIdentityStreamTask] that will process a + /// stream of incoming [NodeIdentity]s. + /// + /// Calling this function will create an asynchronous task that will start + /// processing immediately. The handle for the task will be stored within + /// the returned structure. + pub fn new( + node_identity_receiver: S, + data_state: Arc>, + node_identity_sender: K, + ) -> Self + where + S: Stream + Send + Sync + Unpin + 'static, + K: Sink + Clone + Send + Sync + Unpin + 'static, + { + let task_handle = async_std::task::spawn(Self::process_node_identity_stream( + node_identity_receiver, data_state.clone(), - node_identity_sender.clone(), - ) - .await - { - // We have an error that prevents us from continuing - tracing::info!( - "process node identity stream: error processing node identity: {}", - err - ); - break; + node_identity_sender, + )); + + Self { + task_handle: Some(task_handle), + } + } + + /// [process_node_identity_stream] allows for the consumption of a [Stream] when + /// attempting to process new incoming [NodeIdentity]s. + /// This function will process the incoming [NodeIdentity] and update the + /// [DataState] with the new information. + /// Additionally, the [NodeIdentity] will be sent to the [Sink] so that it can + /// be processed for real-time considerations. + async fn process_node_identity_stream( + mut stream: S, + data_state: Arc>, + node_identity_sender: NISink, + ) where + S: Stream + Unpin, + NISink: Sink + Clone + Unpin, + { + loop { + let node_identity_result = stream.next().await; + let node_identity = if let Some(node_identity) = node_identity_result { + node_identity + } else { + // We have reached the end of the stream + tracing::info!( + "process node identity stream: end of stream reached for node identity stream." + ); + return; + }; + + if let Err(err) = process_incoming_node_identity( + node_identity, + data_state.clone(), + node_identity_sender.clone(), + ) + .await + { + // We have an error that prevents us from continuing + tracing::info!( + "process node identity stream: error processing node identity: {}", + err + ); + break; + } + } + } +} + +/// [Drop] implementation for [ProcessNodeIdentityStreamTask] that will cancel +/// the task if it is dropped. +impl Drop for ProcessNodeIdentityStreamTask { + fn drop(&mut self) { + let task_handle = self.task_handle.take(); + if let Some(task_handle) = task_handle { + async_std::task::block_on(task_handle.cancel()); } } } #[cfg(test)] mod tests { - use super::{process_leaf_stream, DataState}; - use crate::service::data_state::{process_node_identity_stream, LocationDetails, NodeIdentity}; + use super::{DataState, ProcessLeafStreamTask}; + use crate::service::data_state::{ + LocationDetails, NodeIdentity, ProcessNodeIdentityStreamTask, + }; use async_std::{prelude::FutureExt, sync::RwLock}; use espresso_types::{ BlockMerkleTree, ChainConfig, FeeAccount, FeeMerkleTree, Leaf, NodeState, ValidatedState, @@ -477,12 +573,12 @@ mod tests { let (voters_sender, voters_receiver) = futures::channel::mpsc::channel(1); let (leaf_sender, leaf_receiver) = futures::channel::mpsc::channel(1); - let process_leaf_stream_task_handle = async_std::task::spawn(process_leaf_stream( + let mut process_leaf_stream_task_handle = ProcessLeafStreamTask::new( leaf_receiver, data_state.clone(), block_sender, voters_sender, - )); + ); { let data_state = data_state.read().await; @@ -530,6 +626,9 @@ mod tests { assert_eq!( process_leaf_stream_task_handle + .task_handle + .take() + .unwrap() .timeout(Duration::from_millis(200)) .await, Ok(()) @@ -543,12 +642,11 @@ mod tests { let (node_identity_sender_1, node_identity_receiver_1) = futures::channel::mpsc::channel(1); let (node_identity_sender_2, node_identity_receiver_2) = futures::channel::mpsc::channel(1); - let process_node_identity_task_handle = - async_std::task::spawn(process_node_identity_stream( - node_identity_receiver_1, - data_state.clone(), - node_identity_sender_2, - )); + let mut process_node_identity_task_handle = ProcessNodeIdentityStreamTask::new( + node_identity_receiver_1, + data_state.clone(), + node_identity_sender_2, + ); { let data_state = data_state.read().await; @@ -661,6 +759,9 @@ mod tests { assert_eq!( process_node_identity_task_handle + .task_handle + .take() + .unwrap() .timeout(Duration::from_millis(200)) .await, Ok(()) From 1cde72ef08530095e58e2815a4be212eace009b4 Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Thu, 25 Jul 2024 14:09:28 -0600 Subject: [PATCH 49/72] Add optional processing of HotShot Events In order to support the ability to process external messages, we need the ability to process HotShot events. In genreal, we only really need to know about RollCallResponse messages, as that will help us to know when a node is responding to our RollCallRequest. We also need the ability to send RollCallRequests so we can get the response, and wait for any incoming new urls to scan. --- Cargo.lock | 1 + node-metrics/Cargo.toml | 1 + .../v0/create_node_validator_api.rs | 148 +++++++++++++++++- 3 files changed, 142 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 466e19abd..242fbc1db 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6444,6 +6444,7 @@ version = "0.1.0" dependencies = [ "async-compatibility-layer", "async-std", + "bincode", "bitvec", "circular-buffer", "espresso-types", diff --git a/node-metrics/Cargo.toml b/node-metrics/Cargo.toml index cb89cad04..338cbc069 100644 --- a/node-metrics/Cargo.toml +++ b/node-metrics/Cargo.toml @@ -11,6 +11,7 @@ testing = ["serde_json", "espresso-types/testing"] [dependencies] async-compatibility-layer = { workspace = true } async-std = { workspace = true } +bincode = { workspace = true } bitvec = { workspace = true } circular-buffer = { workspace = true } espresso-types = { path = "../types" } diff --git a/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs b/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs index 1ff83f25c..97a667a57 100644 --- a/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs +++ b/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs @@ -14,11 +14,14 @@ use crate::service::{ data_state::{DataState, ProcessLeafStreamTask, ProcessNodeIdentityStreamTask}, server_message::ServerMessage, }; -use async_std::sync::RwLock; +use async_std::{sync::RwLock, task::JoinHandle}; +use espresso_types::{PubKey, SeqTypes}; use futures::{ - channel::mpsc::{self, Receiver, Sender}, - SinkExt, + channel::mpsc::{self, Receiver, SendError, Sender}, + Sink, SinkExt, Stream, StreamExt, }; +use hotshot_types::event::{Event, EventType}; +use serde::{Deserialize, Serialize}; use url::Url; pub struct NodeValidatorAPI { @@ -30,10 +33,10 @@ pub struct NodeValidatorAPI { pub process_node_identity_stream_handle: Option, pub process_url_stream_handle: Option, pub process_consume_leaves: Option, + pub hotshot_event_processing_task: Option, } pub struct NodeValidatorConfig { - pub bind_address: String, pub stake_table_url_base: Url, pub initial_node_public_base_urls: Vec, } @@ -43,6 +46,97 @@ pub enum CreateNodeValidatorProcessingError { FailedToGetStakeTable(hotshot_query_service::Error), } +/// An external message that can be sent to or received from a node +#[derive(Serialize, Deserialize, Clone)] +pub enum ExternalMessage { + /// A request for a node to respond with its identifier + /// Contains the public key of the node that is requesting the roll call + RollCallRequest(PubKey), + + /// A response to a roll call request + /// Contains the identifier of the node + RollCallResponse(RollCallInfo), +} + +/// Information about a node that is used in a roll call response +#[derive(Serialize, Deserialize, Clone)] +pub struct RollCallInfo { + // The public API URL of the node + pub public_api_url: Url, +} + +pub struct HotShotEventProcessingTask { + pub task_handle: Option>, +} + +impl HotShotEventProcessingTask { + pub fn new(event_stream: S, url_sender: K) -> Self + where + S: Stream> + Send + Unpin + 'static, + K: Sink + Send + Unpin + 'static, + { + let task_handle = async_std::task::spawn(Self::process_messages(event_stream, url_sender)); + + Self { + task_handle: Some(task_handle), + } + } + + async fn process_messages(event_receiver: S, url_sender: K) + where + S: Stream> + Send + Unpin + 'static, + K: Sink + Unpin, + { + let mut event_stream = event_receiver; + let mut url_sender = url_sender; + loop { + let event_result = event_stream.next().await; + let event = match event_result { + Some(event) => event, + None => { + tracing::info!("event stream closed"); + break; + } + }; + + let Event { event, .. } = event; + + let external_message_deserialize_result = + if let EventType::ExternalMessageReceived(external_message_bytes) = event { + bincode::deserialize(&external_message_bytes) + } else { + // Ignore all events that are not external messages + continue; + }; + + let external_message: ExternalMessage = match external_message_deserialize_result { + Ok(external_message) => external_message, + Err(err) => { + tracing::info!( + "failed to deserialize external message, unrecognized: {}", + err + ); + continue; + } + }; + + let public_api_url = match external_message { + ExternalMessage::RollCallResponse(roll_call_response) => { + roll_call_response.public_api_url + } + _ => continue, + }; + + // Send the the discovered public url to the sink + let send_result = url_sender.send(public_api_url).await; + if let Err(err) = send_result { + tracing::info!("url sender closed: {}", err); + break; + } + } + } +} + /** * create_node_validator_processing is a function that creates a node validator * processing environment. This function will create a number of tasks that @@ -50,10 +144,17 @@ pub enum CreateNodeValidatorProcessingError { * the various sources. This function will also create the data state that * will be used to store the state of the network. */ -pub async fn create_node_validator_processing( +pub async fn create_node_validator_processing( config: NodeValidatorConfig, internal_client_message_receiver: Receiver>>, -) -> Result { + public_key: PubKey, + event_stream: Option, + external_message_sink: Option, +) -> Result +where + M: Stream> + Send + Unpin + 'static, + K: Sink + Send + Unpin + 'static, +{ let mut data_state = DataState::new( Default::default(), Default::default(), @@ -124,6 +225,28 @@ pub async fn create_node_validator_processing( let process_consume_leaves = ProcessProduceLeafStreamTask::new(client_leaf_stream, leaf_sender); + let hotshot_event_processing_task = match (event_stream, external_message_sink) { + (Some(event_stream), Some(mut external_message_sink)) => { + let hotshot_event_processing_task = + HotShotEventProcessingTask::new(event_stream, url_sender.clone()); + + let send_roll_call_result = external_message_sink + .send(ExternalMessage::RollCallRequest(public_key)) + .await; + + if let Err(err) = send_roll_call_result { + tracing::info!("external message sink closed: {}", err); + } + + Some(hotshot_event_processing_task) + } + _ => { + // It doesn't make sne to send out a RollCall message if we don't + // have the ability to receive the response. + None + } + }; + // Send any initial URLS to the url sender for immediate processing. // These urls are supplied by the configuration of this function { @@ -147,6 +270,7 @@ pub async fn create_node_validator_processing( process_node_identity_stream_handle: Some(process_node_identity_stream_handle), process_url_stream_handle: Some(process_url_stream_handle), process_consume_leaves: Some(process_consume_leaves), + hotshot_event_processing_task, }) } @@ -156,7 +280,9 @@ mod test { api::node_validator::v0::{StateClientMessageSender, STATIC_VER_0_1}, service::{client_message::InternalClientMessage, server_message::ServerMessage}, }; + use espresso_types::PubKey; use futures::channel::mpsc::{self, Sender}; + use hotshot_types::traits::signature_key::BuilderSignatureKey; use tide_disco::App; struct TestState(Sender>>); @@ -172,7 +298,6 @@ mod test { async fn test_full_setup_example() { let (internal_client_message_sender, internal_client_message_receiver) = mpsc::channel(32); let state = TestState(internal_client_message_sender); - // let state = Arc::new(state); let mut app: App<_, crate::api::node_validator::v0::Error> = App::with_state(state); let node_validator_api_result = super::super::define_api::(); @@ -190,9 +315,12 @@ mod test { } } + let public_key = PubKey::generated_from_seed_indexed([0; 32], 0).0; + let (external_message_sender, _external_message_receiver) = mpsc::channel(10); + let (_event_sender, event_receiver) = mpsc::channel(10); + let node_validator_task_state = match super::create_node_validator_processing( super::NodeValidatorConfig { - bind_address: "0.0.0.0:9000".to_string(), stake_table_url_base: "http://localhost:24000/v0".parse().unwrap(), initial_node_public_base_urls: vec![ "http://localhost:24000/".parse().unwrap(), @@ -203,6 +331,9 @@ mod test { ], }, internal_client_message_receiver, + public_key, + Some(event_receiver), + Some(external_message_sender), ) .await { @@ -218,6 +349,7 @@ mod test { let app_serve_result = app.serve("0.0.0.0:9000", STATIC_VER_0_1).await; tracing::info!("app serve result: {:?}", app_serve_result); }); + tracing::info!("now listening on port 9000"); app_serve_handle.await; From 8b5c74d264e8549a79f658496d1c96f1794de2dc Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Thu, 25 Jul 2024 14:42:34 -0600 Subject: [PATCH 50/72] Remove leaf retrieval from `create_node_validator_processing` call With the ability to potentially process HotShot Events via the async task `HotShotEventProcessingTask`, this means that we technically could have access to the `Leaf` nodes via `Decide` events. This can be used as an alternative to retrieving thee `Leaf` from the HotShot Query Service as is done by the `ProcessProduceLeafStreamTask` async task. As such, it doesn't make sense to impose one over the other in the call to `create_node_validator_processing` anymore. So we remove it in order to allow the consumer to make the call to which ever mode they may prefer instead. This allows for flexibility when it comes to how we source this information. --- .../v0/create_node_validator_api.rs | 284 ++++++++++++------ 1 file changed, 197 insertions(+), 87 deletions(-) diff --git a/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs b/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs index 97a667a57..cfcc3238d 100644 --- a/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs +++ b/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs @@ -1,8 +1,6 @@ use std::sync::Arc; -use super::{ - get_stake_table_from_sequencer, ProcessNodeIdentityUrlStreamTask, ProcessProduceLeafStreamTask, -}; +use super::{get_stake_table_from_sequencer, ProcessNodeIdentityUrlStreamTask}; use crate::service::{ client_id::ClientId, client_message::InternalClientMessage, @@ -20,6 +18,7 @@ use futures::{ channel::mpsc::{self, Receiver, SendError, Sender}, Sink, SinkExt, Stream, StreamExt, }; +use hotshot_query_service::Leaf; use hotshot_types::event::{Event, EventType}; use serde::{Deserialize, Serialize}; use url::Url; @@ -32,8 +31,6 @@ pub struct NodeValidatorAPI { pub process_leaf_stream_handle: Option, pub process_node_identity_stream_handle: Option, pub process_url_stream_handle: Option, - pub process_consume_leaves: Option, - pub hotshot_event_processing_task: Option, } pub struct NodeValidatorConfig { @@ -65,30 +62,54 @@ pub struct RollCallInfo { pub public_api_url: Url, } +/// [HotShotEventProcessingTask] is a task that is capable of processing events +/// that are coming in from a HotShot event stream. This task will keep an +/// eye out for ExternalMessageReceived events that can be decoded as a +/// RollCallResponse. When a RollCallResponse is received, the public API URL +/// of the node that sent the message will be sent to the provided url_sender. +/// +/// Additionally, this can receive Decide events and send the discovered leaves +/// to the provided leaf_sender. This can can be used as a means of receiving +/// leaves that doesn't involve hitting an external service like the task +/// [ProcessProduceLeafStreamTask] does. pub struct HotShotEventProcessingTask { pub task_handle: Option>, } impl HotShotEventProcessingTask { - pub fn new(event_stream: S, url_sender: K) -> Self + /// [new] creates a new [HotShotEventProcessingTask] that will process + /// events from the provided event_stream. + /// + /// Calls to [new] will spawn a new task that will start processing + /// immediately. The task handle will be stored in the returned structure. + pub fn new(event_stream: S, url_sender: K1, leaf_sender: K2) -> Self where S: Stream> + Send + Unpin + 'static, - K: Sink + Send + Unpin + 'static, + K1: Sink + Send + Unpin + 'static, + K2: Sink, Error = SendError> + Send + Unpin + 'static, { - let task_handle = async_std::task::spawn(Self::process_messages(event_stream, url_sender)); + let task_handle = async_std::task::spawn(Self::process_messages( + event_stream, + url_sender, + leaf_sender, + )); Self { task_handle: Some(task_handle), } } - async fn process_messages(event_receiver: S, url_sender: K) + /// [process_messages] is a function that will process messages from the + /// provided event stream. + async fn process_messages(event_receiver: S, url_sender: K1, leaf_sender: K2) where S: Stream> + Send + Unpin + 'static, - K: Sink + Unpin, + K1: Sink + Unpin, + K2: Sink, Error = SendError> + Unpin, { let mut event_stream = event_receiver; let mut url_sender = url_sender; + let mut leaf_sender = leaf_sender; loop { let event_result = event_stream.next().await; let event = match event_result { @@ -101,42 +122,151 @@ impl HotShotEventProcessingTask { let Event { event, .. } = event; - let external_message_deserialize_result = - if let EventType::ExternalMessageReceived(external_message_bytes) = event { - bincode::deserialize(&external_message_bytes) - } else { - // Ignore all events that are not external messages - continue; - }; - - let external_message: ExternalMessage = match external_message_deserialize_result { - Ok(external_message) => external_message, - Err(err) => { - tracing::info!( - "failed to deserialize external message, unrecognized: {}", - err - ); + match event { + EventType::Decide { leaf_chain, .. } => { + for leaf_info in leaf_chain.iter() { + let leaf = leaf_info.leaf.clone(); + + let send_result = leaf_sender.send(leaf).await; + if let Err(err) = send_result { + tracing::info!("leaf sender closed: {}", err); + return; + } + } + } + + EventType::ExternalMessageReceived(external_message_bytes) => { + let roll_call_info = match bincode::deserialize(&external_message_bytes) { + Ok(ExternalMessage::RollCallResponse(roll_call_info)) => roll_call_info, + + Err(err) => { + tracing::info!( + "failed to deserialize external message, unrecognized: {}", + err + ); + continue; + } + + _ => { + // Ignore any other potentially recognized messages + continue; + } + }; + + let public_api_url = roll_call_info.public_api_url; + + // Send the the discovered public url to the sink + let send_result = url_sender.send(public_api_url).await; + if let Err(err) = send_result { + tracing::info!("url sender closed: {}", err); + return; + } + } + _ => { + // Ignore all other events continue; } - }; + } + } + } +} + +/// [Drop] implementation for [HotShotEventProcessingTask] that will cancel the +/// task when the structure is dropped. +impl Drop for HotShotEventProcessingTask { + fn drop(&mut self) { + if let Some(task_handle) = self.task_handle.take() { + async_std::task::block_on(task_handle.cancel()); + } + } +} - let public_api_url = match external_message { - ExternalMessage::RollCallResponse(roll_call_response) => { - roll_call_response.public_api_url +/// [ProcessExternalMessageHandlingTask] is a task that is capable of processing +/// external messages that are coming in from an external message stream. This +/// task will keep an eye out for ExternalMessageReceived events that can be +/// decoded as a RollCallResponse. When a RollCallResponse is received, the +/// public API URL of the node that sent the message will be sent to the +/// provided url_sender. +/// +/// This task can be used as a means of processing [ExternalMessage]s that are +/// not being provided by a HotShot event stream. It can be used as an +/// alternative to the [HotShotEventProcessingTask] for processing external +/// messages. +pub struct ProcessExternalMessageHandlingTask { + pub task_handle: Option>, +} + +impl ProcessExternalMessageHandlingTask { + /// [new] creates a new [ProcessExternalMessageHandlingTask] that will + /// process external messages from the provided external_message_receiver. + /// + /// Calls to [new] will spawn a new task that will start processing + /// immediately. The task handle will be stored in the returned structure. + pub fn new(external_message_receiver: S, url_sender: K) -> Self + where + S: Stream + Send + Unpin + 'static, + K: Sink + Send + Unpin + 'static, + { + let task_handle = async_std::task::spawn(Self::process_external_messages( + external_message_receiver, + url_sender, + )); + + Self { + task_handle: Some(task_handle), + } + } + + /// [process_external_messages] is a function that will process messages from + /// the provided external message stream. + async fn process_external_messages(external_message_receiver: S, url_sender: K) + where + S: Stream + Send + Unpin + 'static, + K: Sink + Send + Unpin + 'static, + { + let mut external_message_receiver = external_message_receiver; + let mut url_sender = url_sender; + + loop { + let external_message_result = external_message_receiver.next().await; + let external_message = match external_message_result { + Some(external_message) => external_message, + None => { + tracing::info!("external message receiver closed"); + break; } - _ => continue, }; - // Send the the discovered public url to the sink - let send_result = url_sender.send(public_api_url).await; - if let Err(err) = send_result { - tracing::info!("url sender closed: {}", err); - break; + match external_message { + ExternalMessage::RollCallResponse(roll_call_info) => { + let public_api_url = roll_call_info.public_api_url; + + let send_result = url_sender.send(public_api_url).await; + if let Err(err) = send_result { + tracing::info!("url sender closed: {}", err); + break; + } + } + + _ => { + // Ignore all other messages + continue; + } } } } } +/// [Drop] implementation for [ProcessExternalMessageHandlingTask] that will +/// cancel the task when the structure is dropped. +impl Drop for ProcessExternalMessageHandlingTask { + fn drop(&mut self) { + if let Some(task_handle) = self.task_handle.take() { + async_std::task::block_on(task_handle.cancel()); + } + } +} + /** * create_node_validator_processing is a function that creates a node validator * processing environment. This function will create a number of tasks that @@ -144,17 +274,11 @@ impl HotShotEventProcessingTask { * the various sources. This function will also create the data state that * will be used to store the state of the network. */ -pub async fn create_node_validator_processing( +pub async fn create_node_validator_processing( config: NodeValidatorConfig, internal_client_message_receiver: Receiver>>, - public_key: PubKey, - event_stream: Option, - external_message_sink: Option, -) -> Result -where - M: Stream> + Send + Unpin + 'static, - K: Sink + Send + Unpin + 'static, -{ + leaf_receiver: Receiver>, +) -> Result { let mut data_state = DataState::new( Default::default(), Default::default(), @@ -171,7 +295,6 @@ where ); let client_stake_table = surf_disco::Client::new(config.stake_table_url_base.clone()); - let client_leaf_stream = surf_disco::Client::new(config.stake_table_url_base); let stake_table = get_stake_table_from_sequencer(client_stake_table) .await @@ -182,7 +305,6 @@ where let data_state = Arc::new(RwLock::new(data_state)); let client_thread_state = Arc::new(RwLock::new(client_thread_state)); let (block_detail_sender, block_detail_receiver) = mpsc::channel(32); - let (leaf_sender, leaf_receiver) = mpsc::channel(32); let (node_identity_sender_1, node_identity_receiver_1) = mpsc::channel(32); let (node_identity_sender_2, node_identity_receiver_2) = mpsc::channel(32); let (voters_sender, voters_receiver) = mpsc::channel(32); @@ -223,30 +345,6 @@ where let process_url_stream_handle = ProcessNodeIdentityUrlStreamTask::new(url_receiver, node_identity_sender_1); - let process_consume_leaves = ProcessProduceLeafStreamTask::new(client_leaf_stream, leaf_sender); - - let hotshot_event_processing_task = match (event_stream, external_message_sink) { - (Some(event_stream), Some(mut external_message_sink)) => { - let hotshot_event_processing_task = - HotShotEventProcessingTask::new(event_stream, url_sender.clone()); - - let send_roll_call_result = external_message_sink - .send(ExternalMessage::RollCallRequest(public_key)) - .await; - - if let Err(err) = send_roll_call_result { - tracing::info!("external message sink closed: {}", err); - } - - Some(hotshot_event_processing_task) - } - _ => { - // It doesn't make sne to send out a RollCall message if we don't - // have the ability to receive the response. - None - } - }; - // Send any initial URLS to the url sender for immediate processing. // These urls are supplied by the configuration of this function { @@ -269,20 +367,18 @@ where process_leaf_stream_handle: Some(process_leaf_stream_handle), process_node_identity_stream_handle: Some(process_node_identity_stream_handle), process_url_stream_handle: Some(process_url_stream_handle), - process_consume_leaves: Some(process_consume_leaves), - hotshot_event_processing_task, }) } #[cfg(test)] mod test { use crate::{ - api::node_validator::v0::{StateClientMessageSender, STATIC_VER_0_1}, + api::node_validator::v0::{ + ProcessProduceLeafStreamTask, StateClientMessageSender, STATIC_VER_0_1, + }, service::{client_message::InternalClientMessage, server_message::ServerMessage}, }; - use espresso_types::PubKey; use futures::channel::mpsc::{self, Sender}; - use hotshot_types::traits::signature_key::BuilderSignatureKey; use tide_disco::App; struct TestState(Sender>>); @@ -315,25 +411,38 @@ mod test { } } - let public_key = PubKey::generated_from_seed_indexed([0; 32], 0).0; - let (external_message_sender, _external_message_receiver) = mpsc::channel(10); - let (_event_sender, event_receiver) = mpsc::channel(10); + let (leaf_sender, leaf_receiver) = mpsc::channel(10); + + let client_leaf_stream = surf_disco::Client::new( + "https://query.cappuccino.testnet.espresso.network/v0" + .parse() + .unwrap(), + ); + let process_consume_leaves = + ProcessProduceLeafStreamTask::new(client_leaf_stream, leaf_sender); let node_validator_task_state = match super::create_node_validator_processing( super::NodeValidatorConfig { - stake_table_url_base: "http://localhost:24000/v0".parse().unwrap(), + stake_table_url_base: "https://query.cappuccino.testnet.espresso.network/v0" + .parse() + .unwrap(), initial_node_public_base_urls: vec![ - "http://localhost:24000/".parse().unwrap(), - "http://localhost:24001/".parse().unwrap(), - "http://localhost:24002/".parse().unwrap(), - "http://localhost:24003/".parse().unwrap(), - "http://localhost:24004/".parse().unwrap(), + "https://query-1.cappuccino.testnet.espresso.network/" + .parse() + .unwrap(), + "https://query-2.cappuccino.testnet.espresso.network/" + .parse() + .unwrap(), + "https://query-3.cappuccino.testnet.espresso.network/" + .parse() + .unwrap(), + "https://query-4.cappuccino.testnet.espresso.network/" + .parse() + .unwrap(), ], }, internal_client_message_receiver, - public_key, - Some(event_receiver), - Some(external_message_sender), + leaf_receiver, ) .await { @@ -354,5 +463,6 @@ mod test { app_serve_handle.await; drop(node_validator_task_state); + drop(process_consume_leaves); } } From 0951f9f6b0e63baaa5846ca0c2536040003df00d Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Fri, 26 Jul 2024 15:51:54 -0600 Subject: [PATCH 51/72] Add run_standalone_service call Add main In order for the node-validator api to run as an independent service, it needs to have its own main function for it to run. This is implemented with the assumption that it can connect to the CDN in order to publish and receive RollCall events. Additionally, it is assumped that some initial known node urls can be provided immediately to aid in initial node data population, if necessary. The leaves are also sourced from a hotshot query service availability api. --- Cargo.lock | 2 + node-metrics/Cargo.toml | 2 + .../v0/create_node_validator_api.rs | 22 +- node-metrics/src/api/node_validator/v0/mod.rs | 200 ++++++++++------ node-metrics/src/lib.rs | 217 ++++++++++++++++++ node-metrics/src/main.rs | 11 + 6 files changed, 369 insertions(+), 85 deletions(-) create mode 100644 node-metrics/src/main.rs diff --git a/Cargo.lock b/Cargo.lock index 242fbc1db..43f2d93dc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6447,8 +6447,10 @@ dependencies = [ "bincode", "bitvec", "circular-buffer", + "clap", "espresso-types", "futures", + "hotshot", "hotshot-query-service", "hotshot-stake-table", "hotshot-testing", diff --git a/node-metrics/Cargo.toml b/node-metrics/Cargo.toml index 338cbc069..ec5206092 100644 --- a/node-metrics/Cargo.toml +++ b/node-metrics/Cargo.toml @@ -14,8 +14,10 @@ async-std = { workspace = true } bincode = { workspace = true } bitvec = { workspace = true } circular-buffer = { workspace = true } +clap = { workspace = true } espresso-types = { path = "../types" } futures = { workspace = true } +hotshot = { workspace = true } hotshot-query-service = { workspace = true } hotshot-stake-table = { workspace = true } hotshot-types = { workspace = true } diff --git a/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs b/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs index cfcc3238d..50c5e1293 100644 --- a/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs +++ b/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs @@ -23,7 +23,7 @@ use hotshot_types::event::{Event, EventType}; use serde::{Deserialize, Serialize}; use url::Url; -pub struct NodeValidatorAPI { +pub struct NodeValidatorAPI { pub process_internal_client_message_handle: Option, pub process_distribute_block_detail_handle: Option, pub process_distribute_node_identity_handle: Option, @@ -31,6 +31,7 @@ pub struct NodeValidatorAPI { pub process_leaf_stream_handle: Option, pub process_node_identity_stream_handle: Option, pub process_url_stream_handle: Option, + pub url_sender: K, } pub struct NodeValidatorConfig { @@ -278,7 +279,7 @@ pub async fn create_node_validator_processing( config: NodeValidatorConfig, internal_client_message_receiver: Receiver>>, leaf_receiver: Receiver>, -) -> Result { +) -> Result>, CreateNodeValidatorProcessingError> { let mut data_state = DataState::new( Default::default(), Default::default(), @@ -367,6 +368,7 @@ pub async fn create_node_validator_processing( process_leaf_stream_handle: Some(process_leaf_stream_handle), process_node_identity_stream_handle: Some(process_node_identity_stream_handle), process_url_stream_handle: Some(process_url_stream_handle), + url_sender: url_sender.clone(), }) } @@ -374,7 +376,8 @@ pub async fn create_node_validator_processing( mod test { use crate::{ api::node_validator::v0::{ - ProcessProduceLeafStreamTask, StateClientMessageSender, STATIC_VER_0_1, + HotshotQueryServiceLeafStreamRetriever, ProcessProduceLeafStreamTask, + StateClientMessageSender, STATIC_VER_0_1, }, service::{client_message::InternalClientMessage, server_message::ServerMessage}, }; @@ -413,13 +416,14 @@ mod test { let (leaf_sender, leaf_receiver) = mpsc::channel(10); - let client_leaf_stream = surf_disco::Client::new( - "https://query.cappuccino.testnet.espresso.network/v0" - .parse() - .unwrap(), + let process_consume_leaves = ProcessProduceLeafStreamTask::new( + HotshotQueryServiceLeafStreamRetriever::new( + "https://query.cappuccino.testnet.espresso.network/v0" + .parse() + .unwrap(), + ), + leaf_sender, ); - let process_consume_leaves = - ProcessProduceLeafStreamTask::new(client_leaf_stream, leaf_sender); let node_validator_task_state = match super::create_node_validator_processing( super::NodeValidatorConfig { diff --git a/node-metrics/src/api/node_validator/v0/mod.rs b/node-metrics/src/api/node_validator/v0/mod.rs index e960f8fbf..10afa751d 100644 --- a/node-metrics/src/api/node_validator/v0/mod.rs +++ b/node-metrics/src/api/node_validator/v0/mod.rs @@ -8,9 +8,8 @@ use futures::channel::mpsc::SendError; use futures::future::Either; use futures::{ channel::mpsc::{self, Sender}, - FutureExt, SinkExt, StreamExt, + FutureExt, Sink, SinkExt, Stream, StreamExt, }; -use futures::{Sink, Stream}; use hotshot_query_service::Leaf; use hotshot_stake_table::vec_based::StakeTable; use hotshot_types::light_client::{CircuitField, StateVerKey}; @@ -20,7 +19,9 @@ use hotshot_types::PeerConfig; use prometheus_parse::{Sample, Scrape}; use serde::{Deserialize, Serialize}; use std::fmt; +use std::future::Future; use std::io::BufRead; +use std::pin::Pin; use std::str::FromStr; use tide_disco::socket::Connection; use tide_disco::{api::ApiError, Api}; @@ -402,12 +403,111 @@ pub async fn get_node_identity_from_url( let scrape = prometheus_parse::Scrape::parse(buffered_response.lines())?; if let Some(node_identity) = node_identity_from_scrape(scrape) { + let mut node_identity = node_identity; + node_identity.public_url = Some(url); Ok(node_identity) } else { Err(GetNodeIdentityFromUrlError::NoNodeIdentity) } } +/// LeafStreamRetriever is a general trait that allows for the retrieval of a +/// list of Leaves from a source. The specific implementation doesn't care about +/// the source, only that it is able to retrieve a stream of Leaves. +/// +/// This allows us to swap the implementation of the [LeafStreamRetriever] for +/// testing purposes, or for newer sources in the future. +pub trait LeafStreamRetriever: Send { + type Item; + type ItemError: std::error::Error + Send; + type Error: std::error::Error + Send; + type Stream: Stream> + Send + Unpin; + type Future: Future> + Send; + + /// [retrieve_stream] retrieves a stream of [Leaf]s from the source. It + /// expects the current block height to be provided so that it can determine + /// the starting block height to retrieve the stream of [Leaf]s from. + /// + /// It should check the current height of the chain so that it only needs + /// to retrieve the number of older blocks that are needed, instead of + /// starting from the beginning of time. + fn retrieve_stream(&self, current_block_height: Option) -> Self::Future; +} + +/// [HotshotQueryServiceLeafStreamRetriever] is a [LeafStreamRetriever] that +/// retrieves a stream of [Leaf]s from the Hotshot Query Service. It expects +/// the base URL of the Hotshot Query Service to be provided so that it can +/// make the request to the Hotshot Query Service. +pub struct HotshotQueryServiceLeafStreamRetriever { + base_url: Url, +} + +impl HotshotQueryServiceLeafStreamRetriever { + /// [new] creates a new [HotshotQueryServiceLeafStreamRetriever] that + /// will use the given base [Url] to be able to retrieve the stream of + /// [Leaf]s from the Hotshot Query Service. + /// + /// The [Url] is expected to point to the the API version root of the + /// Hotshot Query Service. Example: + /// https://example.com/v0 + pub fn new(base_url: Url) -> Self { + Self { base_url } + } +} + +impl LeafStreamRetriever for HotshotQueryServiceLeafStreamRetriever { + type Item = Leaf; + type ItemError = hotshot_query_service::Error; + type Error = hotshot_query_service::Error; + type Stream = surf_disco::socket::Connection< + Leaf, + surf_disco::socket::Unsupported, + Self::ItemError, + Version01, + >; + type Future = Pin> + Send>>; + + fn retrieve_stream(&self, current_block_height: Option) -> Self::Future { + let client = surf_disco::Client::new(self.base_url.clone()); + async move { + let block_height_result = client.get("status/block-height").send().await; + let block_height: u64 = match block_height_result { + Ok(block_height) => block_height, + Err(err) => { + tracing::info!("retrieve block height request failed: {}", err); + return Err(err); + } + }; + + let latest_block_start = block_height.saturating_sub(50); + let start_block_height = if let Some(known_height) = current_block_height { + std::cmp::min(known_height, latest_block_start) + } else { + latest_block_start + }; + + let leaves_stream_result = client + .socket(&format!( + "availability/stream/leaves/{}", + start_block_height + )) + .subscribe::() + .await; + + let leaves_stream = match leaves_stream_result { + Ok(leaves_stream) => leaves_stream, + Err(err) => { + tracing::info!("retrieve leaves stream failed: {}", err); + return Err(err); + } + }; + + Ok(leaves_stream) + } + .boxed() + } +} + /// [ProcessProduceLeafStreamTask] is a task that produce a stream of [Leaf]s /// from the Hotshot Query Service. It will attempt to retrieve the [Leaf]s /// from the Hotshot Query Service and then send them to the [Sink] provided. @@ -422,15 +522,16 @@ impl ProcessProduceLeafStreamTask { /// Calling this function will create an async task that will start /// processing immediately. The task's handle will be stored in the /// returned state. - pub fn new( - client: surf_disco::Client, - leaf_sender: K, - ) -> Self + pub fn new(leaf_stream_retriever: R, leaf_sender: K) -> Self where + R: LeafStreamRetriever> + 'static, K: Sink, Error = SendError> + Clone + Send + Sync + Unpin + 'static, { - let task_handle = - async_std::task::spawn(Self::process_consume_leaf_stream(client, leaf_sender)); + // let future = Self::process_consume_leaf_stream(leaf_stream_retriever, leaf_sender); + let task_handle = async_std::task::spawn(Self::process_consume_leaf_stream( + leaf_stream_retriever, + leaf_sender, + )); Self { task_handle: Some(task_handle), @@ -442,32 +543,28 @@ impl ProcessProduceLeafStreamTask { /// Hotshot Query Service and then send them to the [Sink] provided. If the /// [Sink] is closed, or if the Stream ends prematurely, then the function /// will return. - async fn process_consume_leaf_stream( - client: surf_disco::Client, - leaf_sender: K, - ) where + async fn process_consume_leaf_stream(leaf_stream_retriever: R, leaf_sender: K) + where + R: LeafStreamRetriever>, K: Sink, Error = SendError> + Clone + Send + Sync + Unpin + 'static, { // Alright, let's start processing leaves - // TODO: We should move this into its own function that can respond - // and react appropriately when a service or sequencer does down - // so that it can gracefully re-establish the stream as necessary. - - let client = client; - - let mut leaf_stream = - match Self::stream_leaves_from_hotshot_query_service(None, client).await { - Ok(leaf_stream) => leaf_stream, - Err(err) => { - tracing::info!("error getting leaf stream: {}", err); - return; - } - }; + // TODO: implement retry logic with backoff and ultimately fail if + // unable to retrieve the stream within a time frame. + let leaves_stream_result = leaf_stream_retriever.retrieve_stream(None).await; + let leaves_stream = match leaves_stream_result { + Ok(leaves_stream) => leaves_stream, + Err(err) => { + tracing::info!("retrieve leaves stream failed: {}", err); + return; + } + }; let mut leaf_sender = leaf_sender; + let mut leaves_stream = leaves_stream; loop { - let leaf_result = leaf_stream.next().await; + let leaf_result = leaves_stream.next().await; let leaf = if let Some(Ok(leaf)) = leaf_result { leaf } else { @@ -482,55 +579,6 @@ impl ProcessProduceLeafStreamTask { } } } - - /// [stream_leaves_from_hotshot_query_service] retrieves a stream of - /// [sequencer::Leaf]s from the Hotshot Query Service. It expects a - /// [current_block_height] to be provided so that it can determine the starting - /// block height to begin streaming from. No matter what the value of - /// [current_block_height] is the stream will always check what the latest - /// block height is on the hotshot query service. It will then attempt to - /// pull as few Leaves as it needs from the stream. - async fn stream_leaves_from_hotshot_query_service( - current_block_height: Option, - client: surf_disco::Client, - ) -> Result< - impl futures::Stream> + Unpin, - hotshot_query_service::Error, - > { - let block_height_result = client.get("status/block-height").send().await; - let block_height: u64 = match block_height_result { - Ok(block_height) => block_height, - Err(err) => { - tracing::info!("retrieve block height request failed: {}", err); - return Err(err); - } - }; - - let latest_block_start = block_height.saturating_sub(50); - let start_block_height = if let Some(known_height) = current_block_height { - std::cmp::min(known_height, latest_block_start) - } else { - latest_block_start - }; - - let leaves_stream_result = client - .socket(&format!( - "availability/stream/leaves/{}", - start_block_height - )) - .subscribe::() - .await; - - let leaves_stream = match leaves_stream_result { - Ok(leaves_stream) => leaves_stream, - Err(err) => { - tracing::info!("retrieve leaves stream failed: {}", err); - return Err(err); - } - }; - - Ok(leaves_stream) - } } /// [Drop] implementation for [ProcessConsumeLeafStreamTask] that will cancel diff --git a/node-metrics/src/lib.rs b/node-metrics/src/lib.rs index b091aa0b3..faeccc5d0 100644 --- a/node-metrics/src/lib.rs +++ b/node-metrics/src/lib.rs @@ -97,3 +97,220 @@ pub mod api; pub mod service; + +use crate::{ + api::node_validator::v0::{ + create_node_validator_api::{create_node_validator_processing, NodeValidatorConfig}, + HotshotQueryServiceLeafStreamRetriever, ProcessProduceLeafStreamTask, + StateClientMessageSender, STATIC_VER_0_1, + }, + service::{client_message::InternalClientMessage, server_message::ServerMessage}, +}; +use api::node_validator::v0::create_node_validator_api::ExternalMessage; +use clap::Parser; +use espresso_types::{PubKey, SeqTypes}; +use futures::{ + channel::mpsc::{self, Sender}, + SinkExt, +}; +use hotshot::traits::implementations::{ + CdnMetricsValue, PushCdnNetwork, Topic, WrappedSignatureKey, +}; +use hotshot_query_service::metrics::PrometheusMetrics; +use hotshot_types::traits::{network::ConnectedNetwork, signature_key::BuilderSignatureKey}; +use tide_disco::App; +use url::Url; + +/// Options represents the configuration options that are available for running +/// the node validator service via the [run_standalone_service] function. +/// These options are configurable via command line arguments or environment +/// variables. +#[derive(Parser, Clone, Debug)] +pub struct Options { + #[clap(long, env = "ESPRESSO_NODE_VALIDATOR_STAKE_TABLE_SOURCE_BASE_URL")] + stake_table_source_base_url: Url, + + #[clap(long, env = "ESPRESSO_NODE_VALIDATOR_LEAF_STREAM_SOURCE_BASE_URL")] + leaf_stream_base_url: Url, + + #[clap( + long, + env = "ESPRESSO_NODE_VALIDATOR_INITIAL_NODE_PUBLIC_BASE_URLS", + value_delimiter = ',' + )] + initial_node_public_base_urls: Vec, + + #[clap( + long, + value_parser, + env = "ESPRESSO_NODE_VALIDATOR_PORT", + default_value = "9000" + )] + port: u16, + + #[clap(long, env = "ESPRESSO_NODE_VALIDATOR_CDN_MARSHAL_ENDPOINT")] + cdn_marshal_endpoint: String, +} + +impl Options { + fn stake_table_source_base_url(&self) -> &Url { + &self.stake_table_source_base_url + } + + fn leaf_stream_base_url(&self) -> &Url { + &self.leaf_stream_base_url + } + + fn initial_node_public_base_urls(&self) -> &[Url] { + &self.initial_node_public_base_urls + } + + fn port(&self) -> u16 { + self.port + } + + fn cdn_marshal_endpoint(&self) -> &str { + &self.cdn_marshal_endpoint + } +} + +/// MainState represents the State of the application this is available to +/// tide_disco. +struct MainState { + internal_client_message_sender: Sender>>, +} + +impl StateClientMessageSender> for MainState { + fn sender(&self) -> Sender>> { + self.internal_client_message_sender.clone() + } +} + +/// Run the service by itself. +/// +/// This function will run the node validator as its own service. It has some +/// options that allow it to be configured in order for it to operate +/// effectively. +pub async fn run_standalone_service(options: Options) { + let (internal_client_message_sender, internal_client_message_receiver) = mpsc::channel(32); + let state = MainState { + internal_client_message_sender, + }; + + let mut app: App<_, api::node_validator::v0::Error> = App::with_state(state); + let node_validator_api_result = api::node_validator::v0::define_api(); + let node_validator_api = match node_validator_api_result { + Ok(node_validator_api) => node_validator_api, + Err(err) => { + panic!("error defining node validator api: {:?}", err); + } + }; + + match app.register_module("node-validator", node_validator_api) { + Ok(_) => {} + Err(err) => { + panic!("error registering node validator api: {:?}", err); + } + } + + let (leaf_sender, leaf_receiver) = mpsc::channel(10); + + let process_consume_leaves = ProcessProduceLeafStreamTask::new( + HotshotQueryServiceLeafStreamRetriever::new(options.leaf_stream_base_url().clone()), + leaf_sender, + ); + + let node_validator_task_state = match create_node_validator_processing( + NodeValidatorConfig { + stake_table_url_base: options.stake_table_source_base_url().clone(), + initial_node_public_base_urls: options.initial_node_public_base_urls().to_vec(), + }, + internal_client_message_receiver, + leaf_receiver, + ) + .await + { + Ok(node_validator_task_state) => node_validator_task_state, + + Err(err) => { + panic!("error defining node validator api: {:?}", err); + } + }; + + let (public_key, private_key) = PubKey::generated_from_seed_indexed([1; 32], 0); + let cdn_network_result = PushCdnNetwork::::new( + options.cdn_marshal_endpoint().to_string(), + vec![Topic::Global], + hotshot::traits::implementations::KeyPair { + public_key: WrappedSignatureKey(public_key), + private_key: private_key.clone(), + }, + CdnMetricsValue::new(&PrometheusMetrics::default()), + ); + let cdn_network = match cdn_network_result { + Ok(cdn_network) => cdn_network, + Err(err) => { + panic!("error creating cdn network: {:?}", err); + } + }; + + let url_sender = node_validator_task_state.url_sender.clone(); + + let cdn_task_handle = async_std::task::spawn(async move { + let mut url_sender = url_sender; + + loop { + let messages_result = cdn_network.recv_msgs().await; + let messages = match messages_result { + Ok(message) => message, + Err(err) => { + tracing::error!("error receiving message: {:?}", err); + continue; + } + }; + + for message in messages { + // We want to try and decode this message. + let message_deserialize_result = bincode::deserialize::(&message); + let external_message = match message_deserialize_result { + Ok(external_message) => external_message, + Err(err) => { + tracing::error!("error deserializing message: {:?}", err); + continue; + } + }; + + match external_message { + ExternalMessage::RollCallResponse(roll_call_info) => { + let public_api_url = roll_call_info.public_api_url; + + // We have a public api url, so we can process this url. + + if let Err(err) = url_sender.send(public_api_url).await { + tracing::error!("error sending public api url: {:?}", err); + } + } + + _ => { + // We're not concerned about other message types + } + } + } + } + }); + + let port = options.port(); + // We would like to wait until being signaled + let app_serve_handle = async_std::task::spawn(async move { + let app_serve_result = app.serve(format!("0.0.0.0:{}", port), STATIC_VER_0_1).await; + tracing::info!("app serve result: {:?}", app_serve_result); + }); + + tracing::info!("now listening on port {:?}", port); + + app_serve_handle.await; + + drop(cdn_task_handle); + drop(node_validator_task_state); + drop(process_consume_leaves); +} diff --git a/node-metrics/src/main.rs b/node-metrics/src/main.rs new file mode 100644 index 000000000..0b02ee0e3 --- /dev/null +++ b/node-metrics/src/main.rs @@ -0,0 +1,11 @@ +use async_compatibility_layer::logging::{setup_backtrace, setup_logging}; +use clap::Parser; +use node_metrics::{run_standalone_service, Options}; + +#[async_std::main] +async fn main() { + setup_logging(); + setup_backtrace(); + + run_standalone_service(Options::parse()).await; +} From e7c9ed1489f43447f117df8abbd8f53937461879 Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Tue, 30 Jul 2024 07:33:58 -0600 Subject: [PATCH 52/72] Refactor separate CDN tasks, and make optional The CDN processing tasks are define as an inline anonymous code block / function that is defined entirely within the `run_standalone_service`. Additionally, these processes are not optional. Following discussions with @jbearer at the Front End Weekly meeting, the CDN should probably not be utilized as it acts as a hard-barrier that prevents third-parties from being able to build their own dashboards using the data we are providing. As such, the background tasks have been moved into their own types for tracking, and the CDN configuration for them has been made entirely optional. --- .../src/api/node_validator/v0/cdn/mod.rs | 189 ++++++++++++++++++ node-metrics/src/api/node_validator/v0/mod.rs | 2 + node-metrics/src/lib.rs | 137 +++++++------ 3 files changed, 259 insertions(+), 69 deletions(-) create mode 100644 node-metrics/src/api/node_validator/v0/cdn/mod.rs diff --git a/node-metrics/src/api/node_validator/v0/cdn/mod.rs b/node-metrics/src/api/node_validator/v0/cdn/mod.rs new file mode 100644 index 000000000..fe7eaeca9 --- /dev/null +++ b/node-metrics/src/api/node_validator/v0/cdn/mod.rs @@ -0,0 +1,189 @@ +use crate::api::node_validator::v0::create_node_validator_api::ExternalMessage; +use async_std::task::JoinHandle; +use espresso_types::{PubKey, SeqTypes}; +use futures::{channel::mpsc::SendError, Sink, SinkExt}; +use hotshot::{traits::implementations::PushCdnNetwork, types::Message}; +use hotshot_types::{ + message::{MessageKind, VersionedMessage}, + traits::network::{BroadcastDelay, ConnectedNetwork}, +}; +use url::Url; + +/// CdnReceiveMessagesTask represents a task that is responsible for receiving +/// messages from the CDN network and processing them. +/// This task is primarily concerned with recording responses to RollCall +/// requests, and forwarding any discovered public API URLs to the URL sender. +pub struct CdnReceiveMessagesTask { + task_handle: Option>, +} + +impl CdnReceiveMessagesTask { + /// Creates a new `CdnReceiveMessagesTask` with the given network and + /// URL sender. Calling this function will create an async task that + /// will begin executing immediately. The handle for the task will + /// be in the returned structure. + pub fn new(network: PushCdnNetwork, url_sender: K) -> Self + where + K: Sink + Clone + Send + Unpin + 'static, + { + let task_handle = async_std::task::spawn(Self::process_cdn_messages(network, url_sender)); + Self { + task_handle: Some(task_handle), + } + } + + /// [process_cdn_messages] is the function that will begin consuming + /// messages off of the CDN, and start handling them. + /// + /// At the moment, this only looks for and recognizes + /// [MessageKind::External] messages, and attempts to decode + /// [ExternalMessage] from those contained pieces of data. Though, in the + /// future this may be able to be expanded to other things. + async fn process_cdn_messages(network: PushCdnNetwork, url_sender: K) + where + K: Sink + Clone + Send + Unpin + 'static, + { + network.wait_for_ready().await; + let mut url_sender = url_sender; + + loop { + let messages_result = network.recv_msgs().await; + let messages = match messages_result { + Ok(message) => message, + Err(err) => { + tracing::error!("error receiving message: {:?}", err); + continue; + } + }; + + for message in messages { + // We want to try and decode this message. + let message_deserialize_result = Message::::deserialize(&message, &None); + + let message = match message_deserialize_result { + Ok(message) => message, + Err(err) => { + tracing::error!("error deserializing message: {:?}", err); + continue; + } + }; + + let external_message_deserialize_result = match message.kind { + MessageKind::External(external_message) => { + bincode::deserialize::(&external_message) + } + _ => { + tracing::error!("unexpected message kind: {:?}", message); + continue; + } + }; + + let external_message = match external_message_deserialize_result { + Ok(external_message) => external_message, + Err(err) => { + tracing::error!("error deserializing message: {:?}", err); + continue; + } + }; + + match external_message { + ExternalMessage::RollCallResponse(roll_call_info) => { + let public_api_url = roll_call_info.public_api_url; + + // We have a public api url, so we can process this url. + + if let Err(err) = url_sender.send(public_api_url).await { + tracing::error!("error sending public api url: {:?}", err); + } + } + + _ => { + // We're not concerned about other message types + } + } + } + } + } +} + +impl Drop for CdnReceiveMessagesTask { + fn drop(&mut self) { + if let Some(task_handle) = self.task_handle.take() { + async_std::task::block_on(task_handle.cancel()); + } + } +} + +/// BroadcastRollCallTask represents a task that is responsible for broadcasting +/// a RollCallRequest to the CDN network. +pub struct BroadcastRollCallTask { + task_handle: Option>, +} + +impl BroadcastRollCallTask { + /// Creates a new `BroadcastRollCallTask` with the given network and + /// public key. Calling this function will create an async task that + /// will begin executing immediately. The handle for the task will + /// be in the returned structure. + /// + /// This task only performs one action, and then returns. It is not + /// long-lived. + pub fn new(network: PushCdnNetwork, public_key: PubKey) -> Self { + let task_handle = async_std::task::spawn(Self::broadcast_roll_call(network, public_key)); + Self { + task_handle: Some(task_handle), + } + } + + /// [broadcast_roll_call] is the function that will broadcast a + /// RollCallRequest to the CDN network in order to request responses from + /// the rest of the network participants, so we can collect the public API + /// URLs in the message consuming task. + async fn broadcast_roll_call(network: PushCdnNetwork, public_key: PubKey) { + network.wait_for_ready().await; + + // We want to send the Roll Call Request + let rollcall_request = ExternalMessage::RollCallRequest(public_key); + let rollcall_request_serialized = match bincode::serialize(&rollcall_request) { + Ok(rollcall_request_serialized) => rollcall_request_serialized, + Err(err) => { + tracing::error!("error serializing rollcall request: {:?}", err); + return; + } + }; + + let hotshot_message = Message:: { + sender: public_key, + kind: MessageKind::External(rollcall_request_serialized), + }; + + let hotshot_message_serialized = match hotshot_message.serialize(&None) { + Ok(hotshot_message_serialized) => hotshot_message_serialized, + Err(err) => { + tracing::error!("error serializing hotshot message: {:?}", err); + return; + } + }; + + let broadcast_result = network + .broadcast_message( + hotshot_message_serialized, + Default::default(), + BroadcastDelay::None, + ) + .await; + if let Err(err) = broadcast_result { + tracing::error!("error broadcasting rollcall request: {:?}", err); + } + + tracing::info!("broadcast roll call request completed"); + } +} + +impl Drop for BroadcastRollCallTask { + fn drop(&mut self) { + if let Some(task_handle) = self.task_handle.take() { + async_std::task::block_on(task_handle.cancel()); + } + } +} diff --git a/node-metrics/src/api/node_validator/v0/mod.rs b/node-metrics/src/api/node_validator/v0/mod.rs index 10afa751d..94cdecfe3 100644 --- a/node-metrics/src/api/node_validator/v0/mod.rs +++ b/node-metrics/src/api/node_validator/v0/mod.rs @@ -1,4 +1,6 @@ +pub mod cdn; pub mod create_node_validator_api; + use crate::service::client_message::{ClientMessage, InternalClientMessage}; use crate::service::data_state::{LocationDetails, NodeIdentity}; use crate::service::server_message::ServerMessage; diff --git a/node-metrics/src/lib.rs b/node-metrics/src/lib.rs index faeccc5d0..cb006ea64 100644 --- a/node-metrics/src/lib.rs +++ b/node-metrics/src/lib.rs @@ -100,24 +100,21 @@ pub mod service; use crate::{ api::node_validator::v0::{ + cdn::{BroadcastRollCallTask, CdnReceiveMessagesTask}, create_node_validator_api::{create_node_validator_processing, NodeValidatorConfig}, HotshotQueryServiceLeafStreamRetriever, ProcessProduceLeafStreamTask, StateClientMessageSender, STATIC_VER_0_1, }, service::{client_message::InternalClientMessage, server_message::ServerMessage}, }; -use api::node_validator::v0::create_node_validator_api::ExternalMessage; use clap::Parser; use espresso_types::{PubKey, SeqTypes}; -use futures::{ - channel::mpsc::{self, Sender}, - SinkExt, -}; +use futures::channel::mpsc::{self, Sender}; use hotshot::traits::implementations::{ CdnMetricsValue, PushCdnNetwork, Topic, WrappedSignatureKey, }; use hotshot_query_service::metrics::PrometheusMetrics; -use hotshot_types::traits::{network::ConnectedNetwork, signature_key::BuilderSignatureKey}; +use hotshot_types::traits::signature_key::BuilderSignatureKey; use tide_disco::App; use url::Url; @@ -127,12 +124,37 @@ use url::Url; /// variables. #[derive(Parser, Clone, Debug)] pub struct Options { + /// stake_table_source_based_url is the base URL for the config API + /// endpoint that is provided by Espresso Sequencers. + /// + /// This endpoint is expected to point to the version root path of the + /// URL. + /// Example: + /// - https://query.cappuccino.testnet.espresso.network/v0/ #[clap(long, env = "ESPRESSO_NODE_VALIDATOR_STAKE_TABLE_SOURCE_BASE_URL")] stake_table_source_base_url: Url, + /// leaf_stream_base_url is the base URL for the availability API endpoint + /// that is capable of providing a stream of leaf data. + /// + /// This endpoint is expected to point to the version root path of the + /// URL. + /// Example: + /// - https://query.cappuccino.testnet.espresso.network/v0/ + /// #[clap(long, env = "ESPRESSO_NODE_VALIDATOR_LEAF_STREAM_SOURCE_BASE_URL")] leaf_stream_base_url: Url, + /// initial_node_public_base_urls is a list of URLs that are the initial + /// public base URLs of the nodes that are in the network. These can be + /// supplied as an initial source of URLS to scrape for node identity. + /// + /// These urls are expected to point to the root path of the URL for the + /// node, and are expected to be URLS that support the status endpoint + /// for the nodes. + /// + /// Example URL: + /// - https://query-1.cappuccino.testnet.espresso.network/ #[clap( long, env = "ESPRESSO_NODE_VALIDATOR_INITIAL_NODE_PUBLIC_BASE_URLS", @@ -140,6 +162,9 @@ pub struct Options { )] initial_node_public_base_urls: Vec, + /// port is the port that the node validator service will listen on. + /// This port is expected to be a valid port number that is available + /// for the service to bind to. #[clap( long, value_parser, @@ -148,8 +173,12 @@ pub struct Options { )] port: u16, + /// cdn_marshal_endpoint is the endpoint for the CDN marshal service. + /// + /// This endpoint is optional, and if it is not provided, then the CDN + /// service will not be utilized. #[clap(long, env = "ESPRESSO_NODE_VALIDATOR_CDN_MARSHAL_ENDPOINT")] - cdn_marshal_endpoint: String, + cdn_marshal_endpoint: Option, } impl Options { @@ -169,7 +198,7 @@ impl Options { self.port } - fn cdn_marshal_endpoint(&self) -> &str { + fn cdn_marshal_endpoint(&self) -> &Option { &self.cdn_marshal_endpoint } } @@ -237,67 +266,35 @@ pub async fn run_standalone_service(options: Options) { } }; - let (public_key, private_key) = PubKey::generated_from_seed_indexed([1; 32], 0); - let cdn_network_result = PushCdnNetwork::::new( - options.cdn_marshal_endpoint().to_string(), - vec![Topic::Global], - hotshot::traits::implementations::KeyPair { - public_key: WrappedSignatureKey(public_key), - private_key: private_key.clone(), - }, - CdnMetricsValue::new(&PrometheusMetrics::default()), - ); - let cdn_network = match cdn_network_result { - Ok(cdn_network) => cdn_network, - Err(err) => { - panic!("error creating cdn network: {:?}", err); - } - }; - - let url_sender = node_validator_task_state.url_sender.clone(); - - let cdn_task_handle = async_std::task::spawn(async move { - let mut url_sender = url_sender; - - loop { - let messages_result = cdn_network.recv_msgs().await; - let messages = match messages_result { - Ok(message) => message, - Err(err) => { - tracing::error!("error receiving message: {:?}", err); - continue; - } - }; - - for message in messages { - // We want to try and decode this message. - let message_deserialize_result = bincode::deserialize::(&message); - let external_message = match message_deserialize_result { - Ok(external_message) => external_message, - Err(err) => { - tracing::error!("error deserializing message: {:?}", err); - continue; - } - }; - - match external_message { - ExternalMessage::RollCallResponse(roll_call_info) => { - let public_api_url = roll_call_info.public_api_url; + let cdn_tasks = if let Some(cdn_broker_url_string) = options.cdn_marshal_endpoint() { + let (public_key, private_key) = PubKey::generated_from_seed_indexed([1; 32], 0); + let cdn_network_result = PushCdnNetwork::::new( + cdn_broker_url_string.to_string(), + vec![Topic::Global], + hotshot::traits::implementations::KeyPair { + public_key: WrappedSignatureKey(public_key), + private_key: private_key.clone(), + }, + CdnMetricsValue::new(&PrometheusMetrics::default()), + ); + let cdn_network = match cdn_network_result { + Ok(cdn_network) => cdn_network, + Err(err) => { + panic!("error creating cdn network: {:?}", err); + } + }; - // We have a public api url, so we can process this url. + let url_sender = node_validator_task_state.url_sender.clone(); - if let Err(err) = url_sender.send(public_api_url).await { - tracing::error!("error sending public api url: {:?}", err); - } - } + let broadcast_cdn_network = cdn_network.clone(); + let cdn_receive_message_task = CdnReceiveMessagesTask::new(cdn_network, url_sender); + let broadcast_roll_call_task = + BroadcastRollCallTask::new(broadcast_cdn_network, public_key); - _ => { - // We're not concerned about other message types - } - } - } - } - }); + Some((broadcast_roll_call_task, cdn_receive_message_task)) + } else { + None + }; let port = options.port(); // We would like to wait until being signaled @@ -306,11 +303,13 @@ pub async fn run_standalone_service(options: Options) { tracing::info!("app serve result: {:?}", app_serve_result); }); - tracing::info!("now listening on port {:?}", port); - app_serve_handle.await; - drop(cdn_task_handle); + if let Some((broadcast_roll_call_task, cdn_receive_message_task)) = cdn_tasks { + drop(broadcast_roll_call_task); + drop(cdn_receive_message_task); + } + drop(node_validator_task_state); drop(process_consume_leaves); } From 64507cf1bbc145bb43f8afeead18cb6d39d9fbf9 Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Wed, 31 Jul 2024 15:30:24 -0600 Subject: [PATCH 53/72] Add connected network task tests for node-validator In order to verify the behavior of the ConnectedNetwork under various assumptions specific scenario unit tests are desired in order to ensure that they behave as expected. Fix external event handler imports --- Cargo.lock | 1 + node-metrics/Cargo.toml | 1 + .../src/api/node_validator/v0/cdn/mod.rs | 417 +++++++++++++++++- node-metrics/src/lib.rs | 1 + sequencer/src/external_event_handler.rs | 44 +- 5 files changed, 453 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 964eac473..d94a858a7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6549,6 +6549,7 @@ version = "0.1.0" dependencies = [ "async-compatibility-layer", "async-std", + "async-trait", "bincode", "bitvec", "circular-buffer", diff --git a/node-metrics/Cargo.toml b/node-metrics/Cargo.toml index ec5206092..51afc0430 100644 --- a/node-metrics/Cargo.toml +++ b/node-metrics/Cargo.toml @@ -11,6 +11,7 @@ testing = ["serde_json", "espresso-types/testing"] [dependencies] async-compatibility-layer = { workspace = true } async-std = { workspace = true } +async-trait = { workspace = true } bincode = { workspace = true } bitvec = { workspace = true } circular-buffer = { workspace = true } diff --git a/node-metrics/src/api/node_validator/v0/cdn/mod.rs b/node-metrics/src/api/node_validator/v0/cdn/mod.rs index 2cc76c6ca..143170c79 100644 --- a/node-metrics/src/api/node_validator/v0/cdn/mod.rs +++ b/node-metrics/src/api/node_validator/v0/cdn/mod.rs @@ -2,13 +2,53 @@ use crate::api::node_validator::v0::create_node_validator_api::ExternalMessage; use async_std::task::JoinHandle; use espresso_types::{PubKey, SeqTypes}; use futures::{channel::mpsc::SendError, Sink, SinkExt}; -use hotshot::{traits::implementations::PushCdnNetwork, types::Message}; +use hotshot::{ + traits::NetworkError, + types::{Message, SignatureKey}, +}; use hotshot_types::{ message::{MessageKind, VersionedMessage}, - traits::network::{BroadcastDelay, ConnectedNetwork, Topic}, + traits::{ + network::{BroadcastDelay, ConnectedNetwork, Topic}, + node_implementation::NodeType, + }, }; use url::Url; +/// ConnectedNetworkConsumer represents a trait that splits up a portion of +/// the ConnectedNetwork trait, so that the consumer only needs to be aware of +/// the `wait_for_ready` and `recv_msgs` functions. +#[async_trait::async_trait] +pub trait ConnectedNetworkConsumer { + /// [wait_for_ready] will not return until the network is ready to be + /// utilized. + async fn wait_for_ready(&self); + + /// [recv_msgs] will return a list of messages that have been received from + /// the network. + /// + /// ## Errors + /// + /// All errors are expected to be network related. + async fn recv_msgs(&self) -> Result>, NetworkError>; +} + +#[async_trait::async_trait] +impl ConnectedNetworkConsumer for N +where + K: SignatureKey + Send + Sync + 'static, + N: ConnectedNetwork + 'static, +{ + async fn wait_for_ready(&self) { + >::wait_for_ready(self).await + } + + async fn recv_msgs(&self) -> Result>, NetworkError> { + let cloned_self = self.clone(); + >::recv_msgs(&cloned_self).await + } +} + /// CdnReceiveMessagesTask represents a task that is responsible for receiving /// messages from the CDN network and processing them. /// This task is primarily concerned with recording responses to RollCall @@ -22,8 +62,9 @@ impl CdnReceiveMessagesTask { /// URL sender. Calling this function will create an async task that /// will begin executing immediately. The handle for the task will /// be in the returned structure. - pub fn new(network: PushCdnNetwork, url_sender: K) -> Self + pub fn new(network: N, url_sender: K) -> Self where + N: ConnectedNetworkConsumer<::SignatureKey> + Send + 'static, K: Sink + Clone + Send + Unpin + 'static, { let task_handle = async_std::task::spawn(Self::process_cdn_messages(network, url_sender)); @@ -39,8 +80,9 @@ impl CdnReceiveMessagesTask { /// [MessageKind::External] messages, and attempts to decode /// [ExternalMessage] from those contained pieces of data. Though, in the /// future this may be able to be expanded to other things. - async fn process_cdn_messages(network: PushCdnNetwork, url_sender: K) + async fn process_cdn_messages(network: N, url_sender: K) where + N: ConnectedNetworkConsumer<::SignatureKey> + Send + 'static, K: Sink + Clone + Send + Unpin + 'static, { network.wait_for_ready().await; @@ -94,6 +136,7 @@ impl CdnReceiveMessagesTask { if let Err(err) = url_sender.send(public_api_url).await { tracing::error!("error sending public api url: {:?}", err); + return; } } @@ -114,6 +157,47 @@ impl Drop for CdnReceiveMessagesTask { } } +/// ConnectedNetworkPublisher represents a trait that splits up a portion of +/// the ConnectedNetwork trait, so that the consumer only needs to be aware of +/// the `wait_for_ready` and `broadcast_message` functions. +#[async_trait::async_trait] +pub trait ConnectedNetworkPublisher { + /// [wait_for_ready] will not return until the network is ready to be + /// utilized. + async fn wait_for_ready(&self); + + /// [broadcast_message] will broadcast the given message to some subset of + /// nodes in the network based on the given topic. + /// + /// This is a blocking operation. + async fn broadcast_message( + &self, + message: Vec, + topic: Topic, + broadcast_delay: BroadcastDelay, + ) -> Result<(), NetworkError>; +} + +#[async_trait::async_trait] +impl ConnectedNetworkPublisher for N +where + K: SignatureKey + Send + Sync + 'static, + N: ConnectedNetwork + 'static, +{ + async fn wait_for_ready(&self) { + >::wait_for_ready(self).await + } + + async fn broadcast_message( + &self, + message: Vec, + topic: Topic, + broadcast_delay: BroadcastDelay, + ) -> Result<(), NetworkError> { + >::broadcast_message(self, message, topic, broadcast_delay).await + } +} + /// BroadcastRollCallTask represents a task that is responsible for broadcasting /// a RollCallRequest to the CDN network. pub struct BroadcastRollCallTask { @@ -128,7 +212,10 @@ impl BroadcastRollCallTask { /// /// This task only performs one action, and then returns. It is not /// long-lived. - pub fn new(network: PushCdnNetwork, public_key: PubKey) -> Self { + pub fn new(network: N, public_key: PubKey) -> Self + where + N: ConnectedNetworkPublisher<::SignatureKey> + Send + 'static, + { let task_handle = async_std::task::spawn(Self::broadcast_roll_call(network, public_key)); Self { task_handle: Some(task_handle), @@ -139,7 +226,10 @@ impl BroadcastRollCallTask { /// RollCallRequest to the CDN network in order to request responses from /// the rest of the network participants, so we can collect the public API /// URLs in the message consuming task. - async fn broadcast_roll_call(network: PushCdnNetwork, public_key: PubKey) { + async fn broadcast_roll_call(network: N, public_key: PubKey) + where + N: ConnectedNetworkPublisher<::SignatureKey> + Send + 'static, + { network.wait_for_ready().await; // We want to send the Roll Call Request @@ -187,3 +277,318 @@ impl Drop for BroadcastRollCallTask { } } } + +#[cfg(test)] +mod test { + use super::{BroadcastRollCallTask, ConnectedNetworkConsumer, ConnectedNetworkPublisher}; + use crate::api::node_validator::v0::create_node_validator_api::ExternalMessage; + use crate::api::node_validator::v0::{ + cdn::CdnReceiveMessagesTask, create_node_validator_api::RollCallInfo, + }; + use async_std::future::TimeoutError; + use async_std::prelude::FutureExt; + use core::panic; + use espresso_types::SeqTypes; + use futures::channel::mpsc::Sender; + use futures::SinkExt; + use futures::{ + channel::mpsc::{self}, + StreamExt, + }; + use hotshot::types::SignatureKey; + use hotshot::{ + traits::NetworkError, + types::{BLSPubKey, Message}, + }; + use hotshot_types::message::{DataMessage, MessageKind, VersionedMessage}; + use hotshot_types::traits::network::{BroadcastDelay, ResponseMessage}; + use std::time::Duration; + use url::Url; + + /// [TestConnectedNetworkConsumer] is a test implementation of the + /// [ConnectedNetworkConsumer] trait that allows for the simulation of + /// network messages being received. + struct TestConnectedNetworkConsumer(Result>, NetworkError>); + + /// [clone_result] is a helper function that clones the result of a + /// network message receive operation. This is used to ensure that the + /// original result is not consumed by the task. + fn clone_result( + result: &Result>, NetworkError>, + ) -> Result>, NetworkError> { + match result { + Ok(messages) => Ok(messages.clone()), + Err(err) => match err { + NetworkError::ChannelSend => Err(NetworkError::ChannelSend), + _ => panic!("unexpected network error"), + }, + } + } + + #[async_trait::async_trait] + impl ConnectedNetworkConsumer for TestConnectedNetworkConsumer { + async fn wait_for_ready(&self) {} + + async fn recv_msgs(&self) -> Result>, NetworkError> { + async_std::task::sleep(Duration::from_millis(5)).await; + clone_result(&self.0) + } + } + + /// [test_cdn_receive_messages_task] is a test that verifies that the + /// an expected External Message can be encoded, decoded, and sent to the + /// url_sender appropriately. + #[async_std::test] + async fn test_cdn_receive_messages_task() { + let test_hotshot_message_serialized = { + let test_url = Url::parse("http://localhost:8080/").unwrap(); + + let test_external_message = ExternalMessage::RollCallResponse(RollCallInfo { + public_api_url: test_url.clone(), + }); + + let external_message_encoded = bincode::serialize(&test_external_message).unwrap(); + + let test_message = Message:: { + sender: BLSPubKey::generated_from_seed_indexed([0; 32], 0).0, + kind: MessageKind::External(external_message_encoded), + }; + + hotshot_types::message::VersionedMessage::serialize(&test_message, &None).unwrap() + }; + + let (url_sender, url_receiver) = mpsc::channel(1); + let task = CdnReceiveMessagesTask::new( + TestConnectedNetworkConsumer(Ok(vec![test_hotshot_message_serialized])), + url_sender, + ); + + let mut url_receiver = url_receiver; + let next_message = url_receiver + .next() + .timeout(Duration::from_millis(50)) + .await + .unwrap() + .unwrap(); + + assert_eq!(next_message, Url::parse("http://localhost:8080/").unwrap()); + + drop(task); + } + + /// [test_cdn_receive_messages_task_fails_receiving_message] is a test that + /// verifies that the task does not close, nor send a url, when it + /// encounters an error from the recv_msgs function. + #[async_std::test] + async fn test_cdn_receive_messages_task_fails_receiving_message() { + let (url_sender, url_receiver) = mpsc::channel(1); + let task = CdnReceiveMessagesTask::new( + TestConnectedNetworkConsumer(Err(NetworkError::ChannelSend)), + url_sender, + ); + + let mut url_receiver = url_receiver; + // The task should not panic when it fails to receive a message. + let receive_result = url_receiver.next().timeout(Duration::from_millis(50)).await; + + if let Err(TimeoutError { .. }) = receive_result { + // This is expected + } else { + panic!("receive did not timeout"); + } + + drop(task); + } + + /// [test_cdn_receive_messages_task_fails_decoding_hotshot_message] is a + /// test that verifies that the task does not close, nor send a url, when it + /// encounters an error from the deserialization of the hotshot message. + #[async_std::test] + async fn test_cdn_receive_messages_task_fails_decoding_hotshot_message() { + let (url_sender, url_receiver) = mpsc::channel(1); + let task = CdnReceiveMessagesTask::new( + TestConnectedNetworkConsumer(Ok(vec![vec![0]])), + url_sender, + ); + + let mut url_receiver = url_receiver; + // The task should not panic when it fails to receive a message. + let receive_result = url_receiver.next().timeout(Duration::from_millis(50)).await; + + if let Err(TimeoutError { .. }) = receive_result { + // This is expected + } else { + panic!("receive did not timeout"); + } + + drop(task); + } + + /// [test_cdn_receive_messages_task_fails_unexpected_hotshot_message_variant] + /// is a test that verifies that the task does not close, nor send a url, when + /// it encounters a hotshot message that was not an External message. + /// + /// This really shouldn't happen in practice. + #[async_std::test] + async fn test_cdn_receive_messages_task_fails_unexpected_hotshot_message_variant() { + let (url_sender, url_receiver) = mpsc::channel(1); + let bytes = VersionedMessage::serialize( + &Message:: { + sender: BLSPubKey::generated_from_seed_indexed([0; 32], 0).0, + kind: MessageKind::Data(DataMessage::DataResponse(ResponseMessage::NotFound)), + }, + &None, + ) + .unwrap(); + + let task = + CdnReceiveMessagesTask::new(TestConnectedNetworkConsumer(Ok(vec![bytes])), url_sender); + + let mut url_receiver = url_receiver; + // The task should not panic when it fails to receive a message. + let receive_result = url_receiver.next().timeout(Duration::from_millis(50)).await; + + if let Err(TimeoutError { .. }) = receive_result { + // This is expected + } else { + panic!("receive did not timeout"); + } + + drop(task); + } + + /// [test_cdn_receive_messages_task_fails_decoding_external_message] is a + /// test that verifies that the task does not close, nor send a url, when + /// it encounters an error from the deserialization of the external message. + #[async_std::test] + async fn test_cdn_receive_messages_task_fails_decoding_external_message() { + let (url_sender, url_receiver) = mpsc::channel(1); + let bytes = VersionedMessage::serialize( + &Message:: { + sender: BLSPubKey::generated_from_seed_indexed([0; 32], 0).0, + kind: MessageKind::External(vec![0]), + }, + &None, + ) + .unwrap(); + + let task = + CdnReceiveMessagesTask::new(TestConnectedNetworkConsumer(Ok(vec![bytes])), url_sender); + + let mut url_receiver = url_receiver; + // The task should not panic when it fails to receive a message. + let receive_result = url_receiver.next().timeout(Duration::from_millis(50)).await; + + if let Err(TimeoutError { .. }) = receive_result { + // This is expected + } else { + panic!("receive did not timeout"); + } + + drop(task); + } + + /// [test_cdn_receive_messages_tasks_exits_when_url_receiver_closed] is a + /// test that verifies that the task exits when the url receiver is closed. + /// + /// Without being able to send urls to the url_sender, the task doesn't + /// really have a point in existing. + #[async_std::test] + async fn test_cdn_receive_messages_tasks_exits_when_url_receiver_closed() { + let (url_sender, url_receiver) = mpsc::channel(1); + + let test_hotshot_message_serialized = { + let test_url = Url::parse("http://localhost:8080/").unwrap(); + + let test_external_message = ExternalMessage::RollCallResponse(RollCallInfo { + public_api_url: test_url.clone(), + }); + + let external_message_encoded = bincode::serialize(&test_external_message).unwrap(); + + let test_message = Message:: { + sender: BLSPubKey::generated_from_seed_indexed([0; 32], 0).0, + kind: MessageKind::External(external_message_encoded), + }; + + hotshot_types::message::VersionedMessage::serialize(&test_message, &None).unwrap() + }; + drop(url_receiver); + + let mut task = CdnReceiveMessagesTask::new( + TestConnectedNetworkConsumer(Ok(vec![test_hotshot_message_serialized])), + url_sender.clone(), + ); + + let task_handle = task.task_handle.take(); + + if let Some(task_handle) = task_handle { + assert_eq!(task_handle.timeout(Duration::from_millis(50)).await, Ok(())); + } + } + + /// [TestConnectedNetworkPublisher] is a test implementation of the + /// [ConnectedNetworkPublisher] trait that allows for the simulation of + /// network messages being sent. + struct TestConnectedNetworkPublisher(Sender>); + + #[async_trait::async_trait] + impl ConnectedNetworkPublisher for TestConnectedNetworkPublisher { + async fn wait_for_ready(&self) {} + + async fn broadcast_message( + &self, + message: Vec, + _topic: hotshot_types::traits::network::Topic, + _broadcast_delay: BroadcastDelay, + ) -> Result<(), NetworkError> { + let mut sender = self.0.clone(); + let send_result = sender.send(message).await; + send_result.map_err(|_| NetworkError::ChannelSend) + } + } + + /// [test_cdn_broadcast_roll_call_task] is a test that verifies that the + /// task broadcasts a RollCallRequest message to the network. It also + /// verifies that the task is short-lived, as it does not need to persist + /// beyond it's initial request. + #[async_std::test] + async fn test_cdn_broadcast_roll_call_task() { + let (message_sender, message_receiver) = mpsc::channel(1); + + let task = BroadcastRollCallTask::new( + TestConnectedNetworkPublisher(message_sender), + BLSPubKey::generated_from_seed_indexed([0; 32], 0).0, + ); + + let mut message_receiver = message_receiver; + let next_message = message_receiver.next().await.unwrap(); + let next_message = + as VersionedMessage>::deserialize(&next_message, &None) + .unwrap(); + + let external_message = match next_message.kind { + MessageKind::External(external_message) => external_message, + _ => panic!("unexpected message kind"), + }; + + let external_message = bincode::deserialize::(&external_message).unwrap(); + + match external_message { + ExternalMessage::RollCallRequest(public_key) => { + assert_eq!( + public_key, + BLSPubKey::generated_from_seed_indexed([0; 32], 0).0 + ); + } + _ => panic!("unexpected external message"), + } + + let mut task = task; + let task_handle = task.task_handle.take(); + + if let Some(task_handle) = task_handle { + assert_eq!(task_handle.timeout(Duration::from_millis(50)).await, Ok(())); + } + } +} diff --git a/node-metrics/src/lib.rs b/node-metrics/src/lib.rs index b49cbb910..98735c310 100644 --- a/node-metrics/src/lib.rs +++ b/node-metrics/src/lib.rs @@ -84,6 +84,7 @@ //! consume, and which data can be derived for these purposes. //! //! What Data Streams do we need to provide to clients? +//! //! 1. Node Information //! a. Node Identity Information //! - Should be able to be sent in an initial batch diff --git a/sequencer/src/external_event_handler.rs b/sequencer/src/external_event_handler.rs index 3d40e63ac..43e2bb137 100644 --- a/sequencer/src/external_event_handler.rs +++ b/sequencer/src/external_event_handler.rs @@ -3,15 +3,21 @@ use anyhow::{Context, Result}; use async_compatibility_layer::channel::{Receiver, Sender}; use async_std::task::{self, JoinHandle}; -use espresso_types::PubKey; -use hotshot::types::BLSPubKey; -use hotshot_types::traits::network::{BroadcastDelay, ConnectedNetwork, Topic}; +use espresso_types::{PubKey, SeqTypes}; +use hotshot::types::{BLSPubKey, Message, SignatureKey}; +use hotshot_types::{ + message::{MessageKind, VersionedMessage}, + traits::{ + network::{BroadcastDelay, ConnectedNetwork, Topic}, + node_implementation::NodeType, + }, +}; use serde::{Deserialize, Serialize}; use std::sync::Arc; use url::Url; /// An external message that can be sent to or received from a node -#[derive(Serialize, Deserialize, Clone)] +#[derive(Debug, Serialize, Deserialize, Clone)] pub enum ExternalMessage { /// A request for a node to respond with its identifier /// Contains the public key of the node that is requesting the roll call @@ -23,7 +29,7 @@ pub enum ExternalMessage { } /// Information about a node that is used in a roll call response -#[derive(Serialize, Deserialize, Clone)] +#[derive(Debug, Serialize, Deserialize, Clone)] pub struct RollCallInfo { // The public API URL of the node pub public_api_url: Option, @@ -42,6 +48,7 @@ pub struct ExternalEventHandler { } // The different types of outbound messages (broadcast or direct) +#[derive(Debug)] pub enum OutboundMessage { Direct(Vec, PubKey), Broadcast(Vec), @@ -68,6 +75,21 @@ impl ExternalEventHandler { let roll_call_message = ExternalMessage::RollCallResponse(roll_call_info.clone()); let roll_call_message_bytes = bincode::serialize(&roll_call_message) .with_context(|| "Failed to serialize roll call message for initial broadcast")?; + + let message = Message:: { + sender: ::SignatureKey::generated_from_seed_indexed( + [0; 32], 0, + ) + .0, + kind: MessageKind::External(roll_call_message_bytes), + }; + + let roll_call_message_bytes = + as VersionedMessage>::serialize(&message, &None) + .with_context(|| { + "Failed to serialize roll call message for initial broadcast" + })?; + outbound_message_sender .try_send(OutboundMessage::Broadcast(roll_call_message_bytes)) .with_context(|| "External outbound message queue is somehow full")?; @@ -104,6 +126,18 @@ impl ExternalEventHandler { let response_bytes = bincode::serialize(&response) .with_context(|| "Failed to serialize roll call response")?; + let message = Message:: { + sender: ::SignatureKey::generated_from_seed_indexed( + [0; 32], 0, + ) + .0, + kind: MessageKind::::External(response_bytes), + }; + + let response_bytes = + as VersionedMessage>::serialize(&message, &None) + .with_context(|| "Failed to serialize roll call response")?; + // Send the response self.outbound_message_sender .try_send(OutboundMessage::Direct(response_bytes, pub_key)) From f07c65dc7672d141d1f84126d46dc6692a8aed1f Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Thu, 1 Aug 2024 09:46:04 -0600 Subject: [PATCH 54/72] Add Node validator api port to env file --- .env | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.env b/.env index e772ca023..4671ff9f0 100644 --- a/.env +++ b/.env @@ -37,6 +37,8 @@ ESPRESSO_SEQUENCER_GENESIS_FILE=/genesis/demo.toml ESPRESSO_SEQUENCER_L1_PORT=8545 ESPRESSO_SEQUENCER_L1_WS_PORT=8546 ESPRESSO_SEQUENCER_L1_PROVIDER=http://demo-l1-network:${ESPRESSO_SEQUENCER_L1_PORT} +ESPRESSO_NODE_VALIDATOR_PORT=9000 + # Only allow 1 block to be processed for events at a time, simulating a very bad L1 provider. ESPRESSO_SEQUENCER_L1_EVENTS_MAX_BLOCK_RANGE=1 ESPRESSO_SEQUENCER_ETH_MNEMONIC="test test test test test test test test test test test junk" From ecc922fa37f9270f545466fe0d768240b764b253 Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Thu, 1 Aug 2024 09:46:58 -0600 Subject: [PATCH 55/72] Add node_validator to process compose --- process-compose.yaml | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/process-compose.yaml b/process-compose.yaml index 2ef20101c..2aa28e909 100644 --- a/process-compose.yaml +++ b/process-compose.yaml @@ -333,6 +333,36 @@ processes: path: /healthcheck failure_threshold: 100 + node_validator: + command: node-metrics -- + environment: + - ESPRESSO_NODE_VALIDATOR_STAKE_TABLE_SOURCE_BASE_URL=http://localhost:$ESPRESSO_SEQUENCER_API_PORT + - ESPRESSO_NODE_VALIDATOR_LEAF_STREAM_SOURCE_BASE_URL=http://localhost:$ESPRESSO_SEQUENCER_API_PORT + - ESPRESSO_NODE_VALIDATOR_INITIAL_NODE_PUBLIC_BASE_URLS=http://localhost:$ESPRESSO_SEQUENCER_API_PORT,http://localhost:$ESPRESSO_SEQUENCER1_API_PORT,http://localhost:$ESPRESSO_SEQUENCER2_API_PORT,http://localhost:$ESPRESSO_SEQUENCER3_API_PORT,http://localhost:$ESPRESSO_SEQUENCER4_API_PORT + depends_on: + broker_0: + condition: process_healthy + broker_1: + condition: process_healthy + sequencer0: + condition: process_healthy + sequencer1: + condition: process_healthy + sequencer2: + condition: process_healthy + sequencer3: + condition: process_healthy + sequencer4: + condition: process_healthy + readiness_probe: + http_get: + scheme: http + host: localhost + port: $ESPRESSO_SEQUENCER1_API_PORT + path: /healthcheck + failure_threshold: 100 + + # We use KeyDB (a Redis variant) to maintain consistency between # different parts of the CDN # Cheating a bit here too, but KeyDB is not available as a Nix package. From cb132dde56b03c9f0ef848443c78a625b47d7cea Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Thu, 1 Aug 2024 10:37:55 -0600 Subject: [PATCH 56/72] Add node-validator to docker scripts --- .github/workflows/build.yml | 21 +++++++++++++++++++++ .github/workflows/build_static.yml | 20 +++++++++++++++++++- docker-compose.yaml | 20 ++++++++++++++++++++ docker/node-validator.Dockerfile | 18 ++++++++++++++++++ scripts/build-docker-images | 3 ++- scripts/build-docker-images-native | 1 + 6 files changed, 81 insertions(+), 2 deletions(-) create mode 100644 docker/node-validator.Dockerfile diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e4fcca432..22debaa26 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -81,6 +81,7 @@ jobs: target/release/pub-key target/release/espresso-bridge target/release/marketplace-solver + target/release/node-metrics build-arm: runs-on: buildjet-4vcpu-ubuntu-2204-arm @@ -133,6 +134,7 @@ jobs: target/release/pub-key target/release/espresso-bridge target/release/marketplace-solver + target/release/node-metrics build-dockers: runs-on: ubuntu-latest @@ -153,6 +155,7 @@ jobs: espresso-dev-node-tag: ${{ steps.espresso-dev-node.outputs.tags }} bridge-tag: ${{ steps.bridge.outputs.tags }} marketplace-solver-tag: ${{ steps.marketplace-solver.outputs.tags }} + node-validator-tag: ${{ steps.node-validator.outputs.tags }} steps: - name: Checkout Repository uses: actions/checkout@v4 @@ -272,6 +275,12 @@ jobs: with: images: ghcr.io/espressosystems/espresso-sequencer/marketplace-solver + - name: Generate node-validator metadata + uses: docker/metadata-action@v5 + id: node-validator + with: + images: ghcr.io/espressosystems/espresso-sequencer/node-validator + - name: Build and push sequencer docker uses: docker/build-push-action@v6 with: @@ -421,6 +430,16 @@ jobs: tags: ${{ steps.marketplace-solver.outputs.tags }} labels: ${{ steps.marketplace-solver.outputs.labels }} + - name: Build and push node-validator docker + uses: docker/build-push-action@v6 + with: + context: ./ + file: ./docker/node-validator.Dockerfile + platforms: linux/amd64,linux/arm64 + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.node-validator.outputs.tags }} + labels: ${{ steps.node-validator.outputs.labels }} + test-demo: if: ${{ github.event_name != 'pull_request' }} runs-on: ubuntu-latest @@ -450,6 +469,7 @@ jobs: docker pull ${{ needs.build-dockers.outputs.nasty-client-tag }} docker pull ${{ needs.build-dockers.outputs.bridge-tag }} docker pull ${{ needs.build-dockers.outputs.marketplace-solver-tag }} + docker pull ${{ needs.build-dockers.outputs.node-validator-tag }} - name: Tag new docker images run: | @@ -467,6 +487,7 @@ jobs: docker tag ${{ needs.build-dockers.outputs.nasty-client-tag }} ghcr.io/espressosystems/espresso-sequencer/nasty-client:main docker tag ${{ needs.build-dockers.outputs.bridge-tag }} ghcr.io/espressosystems/espresso-sequencer/bridge:main docker tag ${{ needs.build-dockers.outputs.marketplace-solver-tag }} ghcr.io/espressosystems/espresso-sequencer/marketplace-solver:main + docker tag ${{ needs.build-dockers.outputs.node-validator-tag }} ghcr.io/espressosystems/espresso-sequencer/node-validator:main - name: Test docker demo run: | diff --git a/.github/workflows/build_static.yml b/.github/workflows/build_static.yml index 73187114b..8edfb1550 100644 --- a/.github/workflows/build_static.yml +++ b/.github/workflows/build_static.yml @@ -92,6 +92,7 @@ jobs: ${{ env.CARGO_TARGET_DIR }}/${{ env.TARGET_TRIPLET }}/release/espresso-bridge ${{ env.CARGO_TARGET_DIR }}/${{ env.TARGET_TRIPLET }}/release/espresso-dev-node ${{ env.CARGO_TARGET_DIR }}/${{ env.TARGET_TRIPLET }}/release/marketplace-solver + ${{ env.CARGO_TARGET_DIR }}/${{ env.TARGET_TRIPLET }}/release/node-metrics static-dockers: runs-on: ubuntu-latest @@ -216,6 +217,13 @@ jobs: images: ghcr.io/espressosystems/espresso-sequencer/marketplace-solver flavor: suffix=musl + - name: Generate node-validator metadata + uses: docker/metadata-action@v5 + id: node-validator + with: + images: ghcr.io/espressosystems/espresso-sequencer/node-validator + flavor: suffix=musl + - name: Build and push sequencer docker uses: docker/build-push-action@v6 with: @@ -344,4 +352,14 @@ jobs: platforms: linux/amd64,linux/arm64 push: ${{ github.event_name != 'pull_request' }} tags: ${{ steps.marketplace-solver.outputs.tags }} - labels: ${{ steps.marketplace-solver.outputs.labels }} \ No newline at end of file + labels: ${{ steps.marketplace-solver.outputs.labels }} + + - name: Build and push node-validator docker + uses: docker/build-push-action@v6 + with: + context: ./ + file: ./docker/node-validator.Dockerfile + platforms: linux/amd64,linux/arm64 + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.node-validator.outputs.tags }} + labels: ${{ steps.node-validator.outputs.labels }} \ No newline at end of file diff --git a/docker-compose.yaml b/docker-compose.yaml index a5bc05033..f9c0395b3 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -593,6 +593,26 @@ services: solver-db: condition: service_healthy + node-validator: + image: ghcr.io/espressosystems/espresso-sequencer/node-validator:main + ports: + - "$ESPRESSO_NODE_VALIDATOR_PORT:$ESPRESSO_NODE_VALIDATOR_PORT" + environment: + - ESPRESSO_NODE_VALIDATOR_STAKE_TABLE_SOURCE_BASE_URL=http://sequencer0:$ESPRESSO_SEQUENCER_API_PORT + - ESPRESSO_NODE_VALIDATOR_LEAF_STREAM_SOURCE_BASE_URL=http://sequencer0:$ESPRESSO_SEQUENCER_API_PORT + - ESPRESSO_NODE_VALIDATOR_INITIAL_NODE_PUBLIC_BASE_URLS=http://sequencer0:$ESPRESSO_SEQUENCER_API_PORT,http://sequencer1:$ESPRESSO_SEQUENCER1_API_PORT,http://sequencer2:$ESPRESSO_SEQUENCER2_API_PORT,http://sequencer3:$ESPRESSO_SEQUENCER3_API_PORT,http://sequencer4:$ESPRESSO_SEQUENCER4_API_PORT + depends_on: + sequencer0: + condition: process_healthy + sequencer1: + condition: process_healthy + sequencer2: + condition: process_healthy + sequencer3: + condition: process_healthy + sequencer4: + condition: process_healthy + sequencer-db-0: image: postgres user: root diff --git a/docker/node-validator.Dockerfile b/docker/node-validator.Dockerfile new file mode 100644 index 000000000..5a7690f65 --- /dev/null +++ b/docker/node-validator.Dockerfile @@ -0,0 +1,18 @@ +FROM ubuntu:jammy + +ARG TARGETARCH + +RUN apt-get update \ + && apt-get install -y curl libcurl4 wait-for-it tini \ + && rm -rf /var/lib/apt/lists/* +ENTRYPOINT ["tini", "--"] + +COPY target/$TARGETARCH/release/node-metrics /bin/node-metrics +RUN chmod +x /bin/node-metrics + +# Run a web server on this port by default. Port can be overridden by the container orchestrator. +ENV ESPRESSO_NODE_VALIDATOR_PORT=80 + +CMD [ "/bin/node-metrics"] +HEALTHCHECK --interval=1s --timeout=1s --retries=100 CMD curl --fail http://localhost:${ESPRESSO_NODE_VALIDATOR_PORT}/healthcheck || exit 1 +EXPOSE ${ESPRESSO_NODE_VALIDATOR_PORT} diff --git a/scripts/build-docker-images b/scripts/build-docker-images index 072494efb..806621c46 100755 --- a/scripts/build-docker-images +++ b/scripts/build-docker-images @@ -62,4 +62,5 @@ docker build -t ghcr.io/espressosystems/espresso-sequencer/builder:main -f docke docker build -t ghcr.io/espressosystems/espresso-sequencer/nasty-client:main -f docker/nasty-client.Dockerfile ${WORKDIR} docker build -t ghcr.io/espressosystems/espresso-sequencer/espresso-dev-node:main -f docker/espresso-dev-node.Dockerfile ${WORKDIR} docker build -t ghcr.io/espressosystems/espresso-sequencer/bridge:main -f docker/espresso-bridge.Dockerfile ${WORKDIR} -docker build -t ghcr.io/espressosystems/espresso-sequencer/marketplace-solver:main -f docker/marketplace-solver.Dockerfile ${WORKDIR} \ No newline at end of file +docker build -t ghcr.io/espressosystems/espresso-sequencer/marketplace-solver:main -f docker/marketplace-solver.Dockerfile ${WORKDIR} +docker build -t ghcr.io/espressosystems/espresso-sequencer/node-validator:main -f docker/node-validator.Dockerfile ${WORKDIR} \ No newline at end of file diff --git a/scripts/build-docker-images-native b/scripts/build-docker-images-native index 6037ce137..ed409a68f 100755 --- a/scripts/build-docker-images-native +++ b/scripts/build-docker-images-native @@ -118,3 +118,4 @@ docker build --platform $PLATFORM -t ghcr.io/espressosystems/espresso-sequencer/ docker build --platform $PLATFORM -t ghcr.io/espressosystems/espresso-sequencer/espresso-dev-node:main -f docker/espresso-dev-node.Dockerfile ${WORKDIR} docker build --platform $PLATFORM -t ghcr.io/espressosystems/espresso-sequencer/bridge:main -f docker/espresso-bridge.Dockerfile ${WORKDIR} docker build --platform $PLATFORM -t ghcr.io/espressosystems/espresso-sequencer/marketplace-solver:main -f docker/marketplace-solver.Dockerfile ${WORKDIR} +docker build --platform $PLATFORM -t ghcr.io/espressosystems/espresso-sequencer/node-validator:main -f docker/node-validator.Dockerfile ${WORKDIR} From a32198269ebe44dbcb5b1da264fb13a76247c3ab Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Thu, 1 Aug 2024 14:42:49 -0600 Subject: [PATCH 57/72] Refactor simplify InternalClientMessage The `InternalClientMessage` enum largely follows the `ClientMessage`, and as such it has many duplicate cases. As pointed out by @jbearer in this comment: https://github.com/EspressoSystems/espresso-sequencer/pull/1771#discussion_r1700458919. It makes more sense to wrap the `ClientMessage`s instead of having duplicate cases for them. This also has the added benefit of cleaning up much of boilerplate implementation details. Additionally, this relocates the `PartialEq` implementation into the test module so that it is only defined for unit tests. --- .../src/service/client_message/mod.rs | 146 ++++++++---------- node-metrics/src/service/client_state/mod.rs | 63 +++++--- 2 files changed, 109 insertions(+), 100 deletions(-) diff --git a/node-metrics/src/service/client_message/mod.rs b/node-metrics/src/service/client_message/mod.rs index ad8e720af..d19881430 100644 --- a/node-metrics/src/service/client_message/mod.rs +++ b/node-metrics/src/service/client_message/mod.rs @@ -4,7 +4,7 @@ use serde::{Deserialize, Serialize}; /// [ClientMessage] represents the messages that the client can send to the /// server for a request. /// -#[derive(Debug, PartialEq, Serialize, Deserialize)] +#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)] pub enum ClientMessage { SubscribeLatestBlock, SubscribeNodeIdentity, @@ -25,64 +25,14 @@ pub enum InternalClientMessage { Connected(K), Disconnected(ClientId), - SubscribeLatestBlock(ClientId), - SubscribeNodeIdentity(ClientId), - SubscribeVoters(ClientId), - - RequestBlocksSnapshot(ClientId), - RequestNodeIdentitySnapshot(ClientId), - RequestHistogramSnapshot(ClientId), - RequestVotersSnapshot(ClientId), -} - -impl PartialEq for InternalClientMessage { - fn eq(&self, other: &Self) -> bool { - match (self, other) { - // We don't care about the [Sender] here, as it is unable to be - // compared. - (Self::Connected(_), Self::Connected(_)) => true, - (Self::Disconnected(lhs), Self::Disconnected(rhs)) => lhs == rhs, - (Self::SubscribeLatestBlock(lhs), Self::SubscribeLatestBlock(rhs)) => lhs == rhs, - (Self::SubscribeNodeIdentity(lhs), Self::SubscribeNodeIdentity(rhs)) => lhs == rhs, - (Self::SubscribeVoters(lhs), Self::SubscribeVoters(rhs)) => lhs == rhs, - (Self::RequestBlocksSnapshot(lhs), Self::RequestBlocksSnapshot(rhs)) => lhs == rhs, - (Self::RequestNodeIdentitySnapshot(lhs), Self::RequestNodeIdentitySnapshot(rhs)) => { - lhs == rhs - } - (Self::RequestHistogramSnapshot(lhs), Self::RequestHistogramSnapshot(rhs)) => { - lhs == rhs - } - (Self::RequestVotersSnapshot(lhs), Self::RequestVotersSnapshot(rhs)) => lhs == rhs, - _ => false, - } - } + Request(ClientId, ClientMessage), } impl ClientMessage { /// [to_internal_with_client_id] converts the [ClientMessage] into an /// [InternalClientMessage] with the given [ClientId]. pub fn to_internal_with_client_id(&self, client_id: ClientId) -> InternalClientMessage { - match self { - ClientMessage::SubscribeLatestBlock => { - InternalClientMessage::SubscribeLatestBlock(client_id) - } - ClientMessage::SubscribeNodeIdentity => { - InternalClientMessage::SubscribeNodeIdentity(client_id) - } - ClientMessage::SubscribeVoters => InternalClientMessage::SubscribeVoters(client_id), - ClientMessage::RequestBlocksSnapshot => { - InternalClientMessage::RequestBlocksSnapshot(client_id) - } - ClientMessage::RequestNodeIdentitySnapshot => { - InternalClientMessage::RequestNodeIdentitySnapshot(client_id) - } - ClientMessage::RequestHistogramSnapshot => { - InternalClientMessage::RequestHistogramSnapshot(client_id) - } - ClientMessage::RequestVotersSnapshot => { - InternalClientMessage::RequestVotersSnapshot(client_id) - } - } + InternalClientMessage::Request(client_id, *self) } } @@ -94,6 +44,22 @@ mod tests { use futures::channel::mpsc::Sender; use std::iter::zip; + impl PartialEq for InternalClientMessage { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + // We don't care about the [Sender] here, as it is unable to be + // compared. + (Self::Connected(_), Self::Connected(_)) => true, + (Self::Disconnected(lhs), Self::Disconnected(rhs)) => lhs == rhs, + ( + Self::Request(lhs_client_id, lhs_message), + Self::Request(rhs_client_id, rhs_message), + ) => lhs_client_id == rhs_client_id && lhs_message == rhs_message, + _ => false, + } + } + } + #[test] fn test_client_message_partial_eq() { let messages = [ @@ -173,22 +139,7 @@ mod tests { let internal_client_message = message.to_internal_with_client_id::>(client_id); match internal_client_message { - InternalClientMessage::SubscribeLatestBlock(id) => { - assert_eq!(id, client_id); - } - InternalClientMessage::SubscribeNodeIdentity(id) => { - assert_eq!(id, client_id); - } - InternalClientMessage::SubscribeVoters(id) => { - assert_eq!(id, client_id); - } - InternalClientMessage::RequestBlocksSnapshot(id) => { - assert_eq!(id, client_id); - } - InternalClientMessage::RequestNodeIdentitySnapshot(id) => { - assert_eq!(id, client_id); - } - InternalClientMessage::RequestHistogramSnapshot(id) => { + InternalClientMessage::Request(id, _) => { assert_eq!(id, client_id); } _ => panic!("Unexpected InternalClientMessage"), @@ -203,12 +154,27 @@ mod tests { let messages = [ InternalClientMessage::Connected(sender), InternalClientMessage::Disconnected(ClientId::from_count(1)), - InternalClientMessage::SubscribeLatestBlock(ClientId::from_count(1)), - InternalClientMessage::SubscribeNodeIdentity(ClientId::from_count(1)), - InternalClientMessage::SubscribeVoters(ClientId::from_count(1)), - InternalClientMessage::RequestBlocksSnapshot(ClientId::from_count(1)), - InternalClientMessage::RequestNodeIdentitySnapshot(ClientId::from_count(1)), - InternalClientMessage::RequestHistogramSnapshot(ClientId::from_count(1)), + InternalClientMessage::Request( + ClientId::from_count(1), + ClientMessage::SubscribeLatestBlock, + ), + InternalClientMessage::Request( + ClientId::from_count(1), + ClientMessage::SubscribeNodeIdentity, + ), + InternalClientMessage::Request(ClientId::from_count(1), ClientMessage::SubscribeVoters), + InternalClientMessage::Request( + ClientId::from_count(1), + ClientMessage::RequestBlocksSnapshot, + ), + InternalClientMessage::Request( + ClientId::from_count(1), + ClientMessage::RequestNodeIdentitySnapshot, + ), + InternalClientMessage::Request( + ClientId::from_count(1), + ClientMessage::RequestHistogramSnapshot, + ), ]; for (l, r) in zip(messages.iter(), messages.iter()) { @@ -227,12 +193,30 @@ mod tests { for j in 2..12 { let iter_messages = [ InternalClientMessage::Disconnected(ClientId::from_count(j)), - InternalClientMessage::SubscribeLatestBlock(ClientId::from_count(j)), - InternalClientMessage::SubscribeNodeIdentity(ClientId::from_count(j)), - InternalClientMessage::SubscribeVoters(ClientId::from_count(j)), - InternalClientMessage::RequestBlocksSnapshot(ClientId::from_count(j)), - InternalClientMessage::RequestNodeIdentitySnapshot(ClientId::from_count(j)), - InternalClientMessage::RequestHistogramSnapshot(ClientId::from_count(j)), + InternalClientMessage::Request( + ClientId::from_count(j), + ClientMessage::SubscribeLatestBlock, + ), + InternalClientMessage::Request( + ClientId::from_count(j), + ClientMessage::SubscribeNodeIdentity, + ), + InternalClientMessage::Request( + ClientId::from_count(j), + ClientMessage::SubscribeVoters, + ), + InternalClientMessage::Request( + ClientId::from_count(j), + ClientMessage::RequestBlocksSnapshot, + ), + InternalClientMessage::Request( + ClientId::from_count(j), + ClientMessage::RequestNodeIdentitySnapshot, + ), + InternalClientMessage::Request( + ClientId::from_count(j), + ClientMessage::RequestHistogramSnapshot, + ), ]; // We skip the first message, as we don't want to include the diff --git a/node-metrics/src/service/client_state/mod.rs b/node-metrics/src/service/client_state/mod.rs index e34e89bdf..feb88bc98 100644 --- a/node-metrics/src/service/client_state/mod.rs +++ b/node-metrics/src/service/client_state/mod.rs @@ -1,6 +1,6 @@ use super::{ client_id::ClientId, - client_message::InternalClientMessage, + client_message::{ClientMessage, InternalClientMessage}, data_state::{DataState, NodeIdentity}, server_message::ServerMessage, }; @@ -623,22 +623,22 @@ where Ok(()) } - InternalClientMessage::SubscribeLatestBlock(client_id) => { + InternalClientMessage::Request(client_id, ClientMessage::SubscribeLatestBlock) => { handle_client_message_subscribe_latest_block(client_id, client_thread_state).await; Ok(()) } - InternalClientMessage::SubscribeNodeIdentity(client_id) => { + InternalClientMessage::Request(client_id, ClientMessage::SubscribeNodeIdentity) => { handle_client_message_subscribe_node_identity(client_id, client_thread_state).await; Ok(()) } - InternalClientMessage::SubscribeVoters(client_id) => { + InternalClientMessage::Request(client_id, ClientMessage::SubscribeVoters) => { handle_client_message_subscribe_voters(client_id, client_thread_state).await; Ok(()) } - InternalClientMessage::RequestBlocksSnapshot(client_id) => { + InternalClientMessage::Request(client_id, ClientMessage::RequestBlocksSnapshot) => { handle_client_message_request_blocks_snapshot( client_id, data_state, @@ -648,7 +648,7 @@ where Ok(()) } - InternalClientMessage::RequestNodeIdentitySnapshot(client_id) => { + InternalClientMessage::Request(client_id, ClientMessage::RequestNodeIdentitySnapshot) => { handle_client_message_request_node_identity_snapshot( client_id, data_state, @@ -658,7 +658,7 @@ where Ok(()) } - InternalClientMessage::RequestHistogramSnapshot(client_id) => { + InternalClientMessage::Request(client_id, ClientMessage::RequestHistogramSnapshot) => { handle_client_message_request_histogram_snapshot( client_id, data_state, @@ -668,7 +668,7 @@ where Ok(()) } - InternalClientMessage::RequestVotersSnapshot(client_id) => { + InternalClientMessage::Request(client_id, ClientMessage::RequestVotersSnapshot) => { handle_client_message_request_voters_snapshot( client_id, data_state, @@ -1186,7 +1186,7 @@ pub mod tests { use super::{ClientThreadState, InternalClientMessageProcessingTask}; use crate::service::{ client_id::ClientId, - client_message::InternalClientMessage, + client_message::{ClientMessage, InternalClientMessage}, client_state::{ ProcessDistributeBlockDetailHandlingTask, ProcessDistributeNodeIdentityHandlingTask, ProcessDistributeVotersHandlingTask, @@ -1366,7 +1366,10 @@ pub mod tests { assert_eq!( internal_client_message_sender_1 - .send(InternalClientMessage::RequestVotersSnapshot(client_1_id)) + .send(InternalClientMessage::Request( + client_1_id, + ClientMessage::RequestVotersSnapshot + )) .await, Ok(()), ); @@ -1455,7 +1458,10 @@ pub mod tests { assert_eq!( internal_client_message_sender_1 - .send(InternalClientMessage::RequestBlocksSnapshot(client_1_id)) + .send(InternalClientMessage::Request( + client_1_id, + ClientMessage::RequestBlocksSnapshot + )) .await, Ok(()), ); @@ -1536,8 +1542,9 @@ pub mod tests { assert_eq!( internal_client_message_sender_1 - .send(InternalClientMessage::RequestNodeIdentitySnapshot( - client_1_id + .send(InternalClientMessage::Request( + client_1_id, + ClientMessage::RequestNodeIdentitySnapshot )) .await, Ok(()), @@ -1658,14 +1665,20 @@ pub mod tests { assert_eq!( internal_client_message_sender_1 - .send(InternalClientMessage::SubscribeLatestBlock(client_1_id)) + .send(InternalClientMessage::Request( + client_1_id, + ClientMessage::SubscribeLatestBlock + )) .await, Ok(()), ); assert_eq!( internal_client_message_sender_1 - .send(InternalClientMessage::SubscribeLatestBlock(client_2_id)) + .send(InternalClientMessage::Request( + client_2_id, + ClientMessage::SubscribeLatestBlock + )) .await, Ok(()), ); @@ -1829,14 +1842,20 @@ pub mod tests { assert_eq!( internal_client_message_sender_1 - .send(InternalClientMessage::SubscribeNodeIdentity(client_1_id)) + .send(InternalClientMessage::Request( + client_1_id, + ClientMessage::SubscribeNodeIdentity + )) .await, Ok(()), ); assert_eq!( internal_client_message_sender_1 - .send(InternalClientMessage::SubscribeNodeIdentity(client_2_id)) + .send(InternalClientMessage::Request( + client_2_id, + ClientMessage::SubscribeNodeIdentity + )) .await, Ok(()), ); @@ -1972,14 +1991,20 @@ pub mod tests { assert_eq!( internal_client_message_sender_1 - .send(InternalClientMessage::SubscribeVoters(client_1_id)) + .send(InternalClientMessage::Request( + client_1_id, + ClientMessage::SubscribeVoters + )) .await, Ok(()), ); assert_eq!( internal_client_message_sender_1 - .send(InternalClientMessage::SubscribeVoters(client_2_id)) + .send(InternalClientMessage::Request( + client_2_id, + ClientMessage::SubscribeVoters + )) .await, Ok(()), ); From ac01ffe06e8347d15acb644831f39e633514476f Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Thu, 1 Aug 2024 14:49:52 -0600 Subject: [PATCH 58/72] Refactor `DataState` initialization to avoid mutation The `new` call for `DataState` takes a `stake_table` as part of its arguments. Currently, the `DataState` gets called with `Default::default()` arguments, and then the `stake_table` is replaced within it. This is unnecessary as one of the next steps of initialization is to retrieve the `stake_table` from the `stake_table_url_base` url. Instead this should just defer the creation of the `DataState` until after the `stake_table` is retrieved. Suggested by @jbearer in this discussion: https://github.com/EspressoSystems/espresso-sequencer/pull/1771#discussion_r1700515508 --- .../node_validator/v0/create_node_validator_api.rs | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs b/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs index 50c5e1293..f134d5aef 100644 --- a/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs +++ b/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs @@ -280,13 +280,6 @@ pub async fn create_node_validator_processing( internal_client_message_receiver: Receiver>>, leaf_receiver: Receiver>, ) -> Result>, CreateNodeValidatorProcessingError> { - let mut data_state = DataState::new( - Default::default(), - Default::default(), - Default::default(), - Default::default(), - ); - let client_thread_state = ClientThreadState::>::new( Default::default(), Default::default(), @@ -301,7 +294,12 @@ pub async fn create_node_validator_processing( .await .map_err(CreateNodeValidatorProcessingError::FailedToGetStakeTable)?; - data_state.replace_stake_table(stake_table); + let data_state = DataState::new( + Default::default(), + Default::default(), + stake_table, + Default::default(), + ); let data_state = Arc::new(RwLock::new(data_state)); let client_thread_state = Arc::new(RwLock::new(client_thread_state)); From ea3d37a4fed07ab35566b3e651f4bd4c664fc813 Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Thu, 1 Aug 2024 15:08:51 -0600 Subject: [PATCH 59/72] Replace early return in `HotShotEventProcessingTask` with `continue` --- .../src/api/node_validator/v0/create_node_validator_api.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs b/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs index f134d5aef..336140739 100644 --- a/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs +++ b/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs @@ -160,7 +160,7 @@ impl HotShotEventProcessingTask { let send_result = url_sender.send(public_api_url).await; if let Err(err) = send_result { tracing::info!("url sender closed: {}", err); - return; + continue; } } _ => { From 72d09f6809b309b1e5b4f3a66d4282603c35730e Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Thu, 1 Aug 2024 15:12:51 -0600 Subject: [PATCH 60/72] Add leaf chain sort to ensure ascending order @jbearer has noted that the `leaf_chain` within the `Decide` hotshot event returns the highest `Leaf` first: https://github.com/EspressoSystems/espresso-sequencer/pull/1771#discussion_r1700502767 The leaf sender is really wanting the leafs to arrive in ascending order. In order to achieve this, the iterator for the `leaf_chain` has been reversed. --- .../src/api/node_validator/v0/create_node_validator_api.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs b/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs index 336140739..80190beb8 100644 --- a/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs +++ b/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs @@ -125,7 +125,9 @@ impl HotShotEventProcessingTask { match event { EventType::Decide { leaf_chain, .. } => { - for leaf_info in leaf_chain.iter() { + let mut leaf_chain_cloned = leaf_chain.iter().cloned().collect::>(); + leaf_chain_cloned.sort_by_key(|a| a.leaf.height()); + for leaf_info in leaf_chain_cloned.iter() { let leaf = leaf_info.leaf.clone(); let send_result = leaf_sender.send(leaf).await; From 44325109e35ddeb238a53ebd84471f4ab2e7f573 Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Thu, 1 Aug 2024 15:25:33 -0600 Subject: [PATCH 61/72] Fix bad node-validator API documentation As pointed out by @jbearer the documentation for the `node-validator` API was largely copied and then not modified to reflect the specific intentions and features of the `node-validator` endpoints: https://github.com/EspressoSystems/espresso-sequencer/pull/1771#discussion_r1700406550 https://github.com/EspressoSystems/espresso-sequencer/pull/1771#discussion_r1700410378 Modifies the documentation to reflect the purpose and intention behind the `node-validator` API. --- .../v0/create_node_validator_api.rs | 4 +-- .../api/node_validator/v0/node_validator.toml | 27 +++++++++---------- 2 files changed, 14 insertions(+), 17 deletions(-) diff --git a/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs b/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs index 80190beb8..d6e99f34f 100644 --- a/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs +++ b/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs @@ -125,9 +125,7 @@ impl HotShotEventProcessingTask { match event { EventType::Decide { leaf_chain, .. } => { - let mut leaf_chain_cloned = leaf_chain.iter().cloned().collect::>(); - leaf_chain_cloned.sort_by_key(|a| a.leaf.height()); - for leaf_info in leaf_chain_cloned.iter() { + for leaf_info in leaf_chain.iter().rev() { let leaf = leaf_info.leaf.clone(); let send_result = leaf_sender.send(leaf).await; diff --git a/node-metrics/src/api/node_validator/v0/node_validator.toml b/node-metrics/src/api/node_validator/v0/node_validator.toml index eaa99f4ea..6bf0383bd 100644 --- a/node-metrics/src/api/node_validator/v0/node_validator.toml +++ b/node-metrics/src/api/node_validator/v0/node_validator.toml @@ -14,27 +14,26 @@ FORMAT_VERSION = "0.1.0" NAME = "node-validator" DESCRIPTION = """ -HotShot chain state +The node-validator API provides an endpoint that allows for the near real-time +streaming of the HotShot blockchain, and the sequencer nodes that are connected +and contributing to the HotShot blockchain. -The availability API provides an objective view of the HotShot blockchain. It provides access only -to normative data: that is, data which is agreed upon by all honest consensus nodes and which is -immutable. This means access to core consensus data structures including leaves, blocks, and -headers, where each query is pure and idempotent. This also means that it is possible for a client -to verify all of the information provided by this API, by running a HotShot light client and -downloading the appropriate evidence with each query. +The data that is provided by this API can be used to construct a dashboard to +provide near real-time updates / views of the current blockchain state and +nodes. -This API does not provide any queries which represent only the _current_ state of the chain or may -change over time, and it does not provide information for which there is not (yet) agreement of a -supermajority of consensus nodes. For information about the current dynamic state of consensus and -uncommitted state, try the `status` API. For information about the chain which is tabulated by this -specific node and not subject to full consensus agreement, try the `node` API. +Additionally, this gives participating nodes the ability to be identified and +represented for public view, and transparency. """ [route.details] PATH = ["details"] METHOD = "SOCKET" DOC = """ -Subscribe to a stream of blocks in the order they are sequenced, starting at `:height`. +The details endpoint allows a client to opt-in to specific stream updates in a +single connection. All information that is provided by this endpoint is opt-in. +It will only provide what is requested across the WebSocket. -Opens a WebSockets connection and sends a stream of the same data type returned by `block/:height`. +Opens a WebSocket connection that will send events and responses to specifically +requested data. """ From eebb3ce881abf70719248c903d8ebb7e2ee8b47b Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Thu, 1 Aug 2024 15:31:41 -0600 Subject: [PATCH 62/72] Remove unnecessary drops As pointed out by @jbearer, the `drop` calls at the end of the block are not needed: https://github.com/EspressoSystems/espresso-sequencer/pull/1771#discussion_r1700434363 --- node-metrics/src/lib.rs | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/node-metrics/src/lib.rs b/node-metrics/src/lib.rs index 98735c310..65147fef5 100644 --- a/node-metrics/src/lib.rs +++ b/node-metrics/src/lib.rs @@ -245,7 +245,7 @@ pub async fn run_standalone_service(options: Options) { let (leaf_sender, leaf_receiver) = mpsc::channel(10); - let process_consume_leaves = ProcessProduceLeafStreamTask::new( + let _process_consume_leaves = ProcessProduceLeafStreamTask::new( HotshotQueryServiceLeafStreamRetriever::new(options.leaf_stream_base_url().clone()), leaf_sender, ); @@ -267,7 +267,7 @@ pub async fn run_standalone_service(options: Options) { } }; - let cdn_tasks = if let Some(cdn_broker_url_string) = options.cdn_marshal_endpoint() { + let _cdn_tasks = if let Some(cdn_broker_url_string) = options.cdn_marshal_endpoint() { let (public_key, private_key) = PubKey::generated_from_seed_indexed([1; 32], 0); let cdn_network_result = PushCdnNetwork::::new( cdn_broker_url_string.to_string(), @@ -305,12 +305,4 @@ pub async fn run_standalone_service(options: Options) { }); app_serve_handle.await; - - if let Some((broadcast_roll_call_task, cdn_receive_message_task)) = cdn_tasks { - drop(broadcast_roll_call_task); - drop(cdn_receive_message_task); - } - - drop(node_validator_task_state); - drop(process_consume_leaves); } From b46ff5700205b26addec340953364b4b18ecb37d Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Thu, 1 Aug 2024 15:34:51 -0600 Subject: [PATCH 63/72] Refactor define_api call to be more succinct Proposed by @jbearer in comment: https://github.com/EspressoSystems/espresso-sequencer/pull/1771#discussion_r1700427414 --- node-metrics/src/lib.rs | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/node-metrics/src/lib.rs b/node-metrics/src/lib.rs index 65147fef5..bc5f498f8 100644 --- a/node-metrics/src/lib.rs +++ b/node-metrics/src/lib.rs @@ -228,13 +228,8 @@ pub async fn run_standalone_service(options: Options) { }; let mut app: App<_, api::node_validator::v0::Error> = App::with_state(state); - let node_validator_api_result = api::node_validator::v0::define_api(); - let node_validator_api = match node_validator_api_result { - Ok(node_validator_api) => node_validator_api, - Err(err) => { - panic!("error defining node validator api: {:?}", err); - } - }; + let node_validator_api = + api::node_validator::v0::define_api().expect("error defining node validator api"); match app.register_module("node-validator", node_validator_api) { Ok(_) => {} From 762c0a0828b101386f80142abe6b196f8f638d33 Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Thu, 1 Aug 2024 15:51:47 -0600 Subject: [PATCH 64/72] Add comments describing some `NodeIdentity` It is not clear what valid values for `network_type` and `node_type` is or should be. In order to add clarity comments have been added to these two fields to provides some ideas for what could be valid values for these fields. Change suggested by @jbearer: https://github.com/EspressoSystems/espresso-sequencer/pull/1771#discussion_r1700460906 --- .../src/service/data_state/node_identity.rs | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/node-metrics/src/service/data_state/node_identity.rs b/node-metrics/src/service/data_state/node_identity.rs index 5ba8d4c49..d7b0c4ed8 100644 --- a/node-metrics/src/service/data_state/node_identity.rs +++ b/node-metrics/src/service/data_state/node_identity.rs @@ -16,7 +16,36 @@ pub struct NodeIdentity { pub(crate) company_website: Option, pub(crate) location: Option, pub(crate) operating_system: Option, + + /// note_type is meant to reflect the type of the node that is being + /// run. The simplest representation of this value is the specific + /// binary program that is running for the node. In the case of the + /// Espresso sequencer, this is expected to be the value: + /// "espresso-sequencer ". + /// + /// Other implementations may use their own values instead. pub(crate) node_type: Option, + + /// network_type is meant to represent the type of network that the node is + /// connected to. The sample specification has the following values + /// suggested: + /// - residential + /// - hosting + /// + /// It is preferred to have some present values we would like for these + /// to be, but for flexibility it is set to be a generic String. + /// Proposed values: + /// - Residential + /// - AWS + /// - Azure + /// - GCP + /// + /// These could also potentially include the availability zone for the + /// hosted networks: + /// - AWS (us-east-1) + /// + /// This could potentially even be: + /// - AWS (us-east-1a) pub(crate) network_type: Option, } From 9708b9e3f01b9ddd97b70a18bfd132d134d03574 Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Thu, 1 Aug 2024 16:24:57 -0600 Subject: [PATCH 65/72] Refactor ExtenralEvenHandler based on feedback Add `create_roll_call_response` to handle message serialization Suggested by @jbearer: https://github.com/EspressoSystems/espresso-sequencer/pull/1771#discussion_r1700494615 Replace dummy public key with actual public key Suggested by @jbearer: https://github.com/EspressoSystems/espresso-sequencer/pull/1771#discussion_r1700489346 Replace `Vec` of `JoinHandle` and `Drop` code with `TaskList` suggested by @jbearer: https://github.com/EspressoSystems/espresso-sequencer/pull/1771#discussion_r1700493073 --- sequencer/src/context.rs | 2 +- sequencer/src/external_event_handler.rs | 99 +++++++++++-------------- 2 files changed, 46 insertions(+), 55 deletions(-) diff --git a/sequencer/src/context.rs b/sequencer/src/context.rs index a603fa569..9b2e18c55 100644 --- a/sequencer/src/context.rs +++ b/sequencer/src/context.rs @@ -164,7 +164,7 @@ impl, P: SequencerPersistence, Ver: StaticVersionTyp let roll_call_info = external_event_handler::RollCallInfo { public_api_url }; // Create the external event handler - let external_event_handler = ExternalEventHandler::new(network, roll_call_info) + let external_event_handler = ExternalEventHandler::new(network, roll_call_info, pub_key) .with_context(|| "Failed to create external event handler")?; Ok(Self::new( diff --git a/sequencer/src/external_event_handler.rs b/sequencer/src/external_event_handler.rs index 43e2bb137..5fa177da0 100644 --- a/sequencer/src/external_event_handler.rs +++ b/sequencer/src/external_event_handler.rs @@ -1,16 +1,13 @@ //! Should probably rename this to "external" or something +use crate::context::TaskList; use anyhow::{Context, Result}; use async_compatibility_layer::channel::{Receiver, Sender}; -use async_std::task::{self, JoinHandle}; use espresso_types::{PubKey, SeqTypes}; -use hotshot::types::{BLSPubKey, Message, SignatureKey}; +use hotshot::types::{BLSPubKey, Message}; use hotshot_types::{ message::{MessageKind, VersionedMessage}, - traits::{ - network::{BroadcastDelay, ConnectedNetwork, Topic}, - node_implementation::NodeType, - }, + traits::network::{BroadcastDelay, ConnectedNetwork, Topic}, }; use serde::{Deserialize, Serialize}; use std::sync::Arc; @@ -40,8 +37,11 @@ pub struct ExternalEventHandler { // The `RollCallInfo` of the node (used in the roll call response) pub roll_call_info: RollCallInfo, + // The public key of the node + pub public_key: BLSPubKey, + // The tasks that are running - pub tasks: Vec>, + pub _tasks: TaskList, // The outbound message queue pub outbound_message_sender: Sender, @@ -59,36 +59,25 @@ impl ExternalEventHandler { pub fn new>( network: Arc, roll_call_info: RollCallInfo, + public_key: BLSPubKey, ) -> Result { // Create the outbound message queue let (outbound_message_sender, outbound_message_receiver) = async_compatibility_layer::channel::bounded(10); + let mut tasks: TaskList = Default::default(); + // Spawn the outbound message handling loop - let outbound_message_loop = async_std::task::spawn(Self::outbound_message_loop( - outbound_message_receiver, - network, - )); + tasks.spawn( + "ExternalEventHandler (RollCall)", + Self::outbound_message_loop(outbound_message_receiver, network), + ); // We just started, so queue an outbound RollCall message (if we have a public API URL) if roll_call_info.public_api_url.is_some() { - let roll_call_message = ExternalMessage::RollCallResponse(roll_call_info.clone()); - let roll_call_message_bytes = bincode::serialize(&roll_call_message) - .with_context(|| "Failed to serialize roll call message for initial broadcast")?; - - let message = Message:: { - sender: ::SignatureKey::generated_from_seed_indexed( - [0; 32], 0, - ) - .0, - kind: MessageKind::External(roll_call_message_bytes), - }; - let roll_call_message_bytes = - as VersionedMessage>::serialize(&message, &None) - .with_context(|| { - "Failed to serialize roll call message for initial broadcast" - })?; + Self::create_roll_call_response(&public_key, &roll_call_info) + .with_context(|| "Failed to create roll call response for initial broadcast")?; outbound_message_sender .try_send(OutboundMessage::Broadcast(roll_call_message_bytes)) @@ -97,7 +86,8 @@ impl ExternalEventHandler { Ok(Self { roll_call_info, - tasks: vec![outbound_message_loop], + public_key, + _tasks: tasks, outbound_message_sender, }) } @@ -119,24 +109,11 @@ impl ExternalEventHandler { return Ok(()); } - // If it's a roll call request, send our information (if we have a public API URL) - let response = ExternalMessage::RollCallResponse(self.roll_call_info.clone()); - - // Serialize the response - let response_bytes = bincode::serialize(&response) - .with_context(|| "Failed to serialize roll call response")?; - - let message = Message:: { - sender: ::SignatureKey::generated_from_seed_indexed( - [0; 32], 0, - ) - .0, - kind: MessageKind::::External(response_bytes), - }; - let response_bytes = - as VersionedMessage>::serialize(&message, &None) - .with_context(|| "Failed to serialize roll call response")?; + Self::create_roll_call_response(&self.public_key, &self.roll_call_info) + .with_context(|| { + "Failed to serialize roll call response for RollCallRequest" + })?; // Send the response self.outbound_message_sender @@ -151,6 +128,29 @@ impl ExternalEventHandler { Ok(()) } + /// Creates a roll call response message + fn create_roll_call_response( + public_key: &BLSPubKey, + roll_call_info: &RollCallInfo, + ) -> Result> { + let response = ExternalMessage::RollCallResponse(roll_call_info.clone()); + + // Serialize the response + let response_bytes = bincode::serialize(&response) + .with_context(|| "Failed to serialize roll call response")?; + + let message = Message:: { + sender: *public_key, + kind: MessageKind::::External(response_bytes), + }; + + let response_bytes = + as VersionedMessage>::serialize(&message, &None) + .with_context(|| "Failed to serialize roll call response")?; + + Ok(response_bytes) + } + /// The main loop for sending outbound messages. async fn outbound_message_loop>( mut receiver: Receiver, @@ -179,12 +179,3 @@ impl ExternalEventHandler { } } } - -impl Drop for ExternalEventHandler { - fn drop(&mut self) { - // Cancel all tasks - for task in self.tasks.drain(..) { - task::block_on(task.cancel()); - } - } -} From ba8048c954ece20a0f907c0a8210046bd0702f43 Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Thu, 1 Aug 2024 16:54:49 -0600 Subject: [PATCH 66/72] Refactor separate node identity options into struct Based on feedback provided by @jbearer: https://github.com/EspressoSystems/espresso-sequencer/pull/1771#discussion_r1700498080 https://github.com/EspressoSystems/espresso-sequencer/pull/1771#discussion_r1700465232 Instead of querying all of the environment variables separately, and dynamically after the program has launched, we can take advantage of clap's ability to automatically populate the desired data points from either command line arguments or environment variables. As an added benefit the fields can be grouped together into a single `Identity` structure. --- sequencer/src/lib.rs | 33 ++++++++++++++------- sequencer/src/main.rs | 2 ++ sequencer/src/options.rs | 64 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 88 insertions(+), 11 deletions(-) diff --git a/sequencer/src/lib.rs b/sequencer/src/lib.rs index 0d041ca5e..d5fdfc331 100644 --- a/sequencer/src/lib.rs +++ b/sequencer/src/lib.rs @@ -23,6 +23,7 @@ use hotshot_example_types::auction_results_provider_types::TestAuctionResultsPro // Should move `STAKE_TABLE_CAPACITY` in the sequencer repo when we have variate stake table support use libp2p::Multiaddr; use network::libp2p::split_off_peer_id; +use options::Identity; use state_signature::static_stake_table_commitment; use url::Url; pub mod persistence; @@ -122,6 +123,7 @@ pub struct L1Params { pub events_max_block_range: u64, } +#[allow(clippy::too_many_arguments)] pub async fn init_node( genesis: Genesis, network_params: NetworkParams, @@ -130,6 +132,7 @@ pub async fn init_node( l1_params: L1Params, bind_version: Ver, is_da: bool, + identity: Identity, ) -> anyhow::Result> { // Expose git information via status API. metrics @@ -158,14 +161,16 @@ pub async fn init_node( ], ) .create(vec![ - std::env::var("ESPRESSO_SEQUENCER_IDENTITY_NODE_NAME").unwrap_or("".into()), - std::env::var("ESPRESSO_SEQUENCER_IDENTITY_WALLET_ADDRESS").unwrap_or("".into()), - std::env::var("ESPRESSO_SEQUENCER_IDENTITY_COMPANY_NAME").unwrap_or("".into()), - std::env::var("ESPRESSO_SEQUENCER_IDENTITY_COMPANY_WEBSITE").unwrap_or("".into()), - std::env::var("ESPRESSO_SEQUENCER_IDENTITY_OPERATING_SYSTEM").unwrap_or("".into()), - std::env::var("ESPRESSO_SEQUENCER_IDENTITY_NODE_TYPE") - .unwrap_or(format!("espresso-sequencer {}", Ver::VERSION)), - std::env::var("ESPRESSO_SEQUENCER_IDENTITY_NETWORK_TYPE").unwrap_or("".into()), + identity.node_name.unwrap_or("".into()), + identity.wallet_address.unwrap_or("".into()), + identity.company_name.unwrap_or("".into()), + identity + .company_website + .map(|u| u.into()) + .unwrap_or("".into()), + identity.operating_system.unwrap_or("".into()), + identity.node_type.unwrap_or("".into()), + identity.network_type.unwrap_or("".into()), ]); // Expose Node Identity Location via the status/metrics API @@ -175,9 +180,15 @@ pub async fn init_node( vec!["country".into(), "latitude".into(), "longitude".into()], ) .create(vec![ - std::env::var("ESPRESSO_SEQUENCER_IDENTITY_COUNTRY_CODE").unwrap_or("".into()), - std::env::var("ESPRESSO_SEQUENCER_IDENTITY_LATITUDE").unwrap_or("".into()), - std::env::var("ESPRESSO_SEQUENCER_IDENTITY_LONGITUDE").unwrap_or("".into()), + identity.country_code.unwrap_or("".into()), + identity + .latitude + .map(|l| l.to_string()) + .unwrap_or("".into()), + identity + .longitude + .map(|l| l.to_string()) + .unwrap_or("".into()), ]); // Stick our public key in `metrics` so it is easily accessible via the status API. diff --git a/sequencer/src/main.rs b/sequencer/src/main.rs index f93395c96..070d0e459 100644 --- a/sequencer/src/main.rs +++ b/sequencer/src/main.rs @@ -129,6 +129,7 @@ where l1_params, bind_version, opt.is_da, + opt.identity, ) .await .unwrap() @@ -148,6 +149,7 @@ where l1_params, bind_version, opt.is_da, + opt.identity, ) .await? } diff --git a/sequencer/src/options.rs b/sequencer/src/options.rs index 021797f79..6137675db 100644 --- a/sequencer/src/options.rs +++ b/sequencer/src/options.rs @@ -210,6 +210,9 @@ pub struct Options { #[clap(flatten)] pub logging: logging::Config, + + #[clap(skip)] + pub identity: Identity, } impl Options { @@ -240,6 +243,67 @@ impl Options { } } +/// Identity represents identifying information concerning the sequencer node. +/// This information is used to populate relevant information in the metrics +/// endpoint. This information will also potentially be scraped and displayed +/// in a public facing dashboard. +#[derive(Parser, Clone, Derivative)] +#[derivative(Debug(bound = ""))] +pub struct Identity { + #[clap(long, env = "ESPRESSO_SEQUENCER_IDENTITY_COUNTRY_CODE")] + pub country_code: Option, + #[clap(long, env = "ESPRESSO_SEQUENCER_IDENTITY_LATITUDE")] + pub latitude: Option, + #[clap(long, env = "ESPRESSO_SEQUENCER_IDENTITY_LONGITUDE")] + pub longitude: Option, + + #[clap(long, env = "ESPRESSO_SEQUENCER_IDENTITY_NODE_NAME")] + pub node_name: Option, + + #[clap(long, env = "ESPRESSO_SEQUENCER_IDENTITY_WALLET_ADDRESS")] + pub wallet_address: Option, + #[clap(long, env = "ESPRESSO_SEQUENCER_IDENTITY_COMPANY_NAME")] + pub company_name: Option, + #[clap(long, env = "ESPRESSO_SEQUENCER_IDENTITY_COMPANY_WEBSITE")] + pub company_website: Option, + #[clap(long, env = "ESPRESSO_SEQUENCER_IDENTITY_OPERATING_SYSTEM")] + pub operating_system: Option, + #[clap(long, env = "ESPRESSO_SEQUENCER_IDENTITY_NODE_TYPE")] + pub node_type: Option, + #[clap(long, env = "ESPRESSO_SEQUENCER_IDENTITY_NETWORK_TYPE")] + pub network_type: Option, +} + +impl Default for Identity { + fn default() -> Self { + let Identity { + country_code, + latitude, + longitude, + node_name, + wallet_address, + company_name, + company_website, + operating_system, + node_type, + network_type, + } = Self::parse(); + + Self { + country_code, + latitude, + longitude, + node_name, + wallet_address, + company_name, + company_website, + operating_system: operating_system.or(Some(std::env::consts::OS.to_string())), + node_type: node_type.or(Some(format!("sequencer-{}", env!("CARGO_PKG_VERSION")))), + network_type, + } + } +} + // The Debug implementation for Url is noisy, we just want to see the URL fn fmt_urls(v: &[Url], fmt: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { write!( From c0206d153d4d7ed2bbf676954ca6758123e24085 Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Fri, 2 Aug 2024 07:50:31 -0600 Subject: [PATCH 67/72] Refactor replace async task returns with panics Based on feedback discussion with @jbearer: https://github.com/EspressoSystems/espresso-sequencer/pull/1771#discussion_r1701007360 https://github.com/EspressoSystems/espresso-sequencer/pull/1771#discussion_r1701009775 In the cases where we are no longer able to make meaningful progress, and in an effort to be better about not failing silently and then having our data slowly stagnate over time, these early task returns have been replaced with panics for improved failure indicators. --- .../v0/create_node_validator_api.rs | 12 ++--- node-metrics/src/api/node_validator/v0/mod.rs | 7 ++- node-metrics/src/service/client_state/mod.rs | 10 ++--- node-metrics/src/service/data_state/mod.rs | 45 ++++++++++++++----- 4 files changed, 49 insertions(+), 25 deletions(-) diff --git a/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs b/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs index d6e99f34f..210dc805b 100644 --- a/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs +++ b/node-metrics/src/api/node_validator/v0/create_node_validator_api.rs @@ -130,8 +130,8 @@ impl HotShotEventProcessingTask { let send_result = leaf_sender.send(leaf).await; if let Err(err) = send_result { - tracing::info!("leaf sender closed: {}", err); - return; + tracing::error!("leaf sender closed: {}", err); + panic!("HotShotEventProcessingTask leaf sender is closed, unrecoverable, the block state will stagnate."); } } } @@ -159,8 +159,8 @@ impl HotShotEventProcessingTask { // Send the the discovered public url to the sink let send_result = url_sender.send(public_api_url).await; if let Err(err) = send_result { - tracing::info!("url sender closed: {}", err); - continue; + tracing::error!("url sender closed: {}", err); + panic!("HotShotEventProcessingTask url sender is closed, unrecoverable, the node state will stagnate."); } } _ => { @@ -233,7 +233,7 @@ impl ProcessExternalMessageHandlingTask { let external_message = match external_message_result { Some(external_message) => external_message, None => { - tracing::info!("external message receiver closed"); + tracing::error!("external message receiver closed"); break; } }; @@ -244,7 +244,7 @@ impl ProcessExternalMessageHandlingTask { let send_result = url_sender.send(public_api_url).await; if let Err(err) = send_result { - tracing::info!("url sender closed: {}", err); + tracing::error!("url sender closed: {}", err); break; } } diff --git a/node-metrics/src/api/node_validator/v0/mod.rs b/node-metrics/src/api/node_validator/v0/mod.rs index 94cdecfe3..2f3cb5e7d 100644 --- a/node-metrics/src/api/node_validator/v0/mod.rs +++ b/node-metrics/src/api/node_validator/v0/mod.rs @@ -889,8 +889,11 @@ impl ProcessNodeIdentityUrlStreamTask { let send_result = node_identity_sender.send(node_identity).await; if let Err(err) = send_result { - tracing::info!("node identity sender closed: {}", err); - return; + tracing::error!("node identity sender closed: {}", err); + + // We will be unable to provide any additional node identity + // updates. This is considered a critical error. + panic!("ProcessNodeIdentityUrlStreamTask node_identity_sender closed, future node identity information will stagnate: {}", err); } } } diff --git a/node-metrics/src/service/client_state/mod.rs b/node-metrics/src/service/client_state/mod.rs index feb88bc98..ab0767388 100644 --- a/node-metrics/src/service/client_state/mod.rs +++ b/node-metrics/src/service/client_state/mod.rs @@ -942,8 +942,8 @@ impl InternalClientMessageProcessingTask { let message = if let Some(message) = message_result { message } else { - tracing::info!("internal client message handler closed."); - return; + tracing::error!("internal client message handler closed."); + panic!("InternalClientMessageProcessingTask stream closed, unable to process new requests from clients."); }; if let Err(err) = @@ -1020,7 +1020,7 @@ impl ProcessDistributeBlockDetailHandlingTask { let block_detail = if let Some(block_detail) = block_detail_result { block_detail } else { - tracing::info!( + tracing::error!( "block detail stream closed. shutting down client handling stream.", ); return; @@ -1091,7 +1091,7 @@ impl ProcessDistributeNodeIdentityHandlingTask { let node_identity = if let Some(node_identity) = node_identity_result { node_identity } else { - tracing::info!( + tracing::error!( "node identity stream closed. shutting down client handling stream.", ); return; @@ -1161,7 +1161,7 @@ impl ProcessDistributeVotersHandlingTask { let voters = if let Some(voters) = voters_result { voters } else { - tracing::info!("voters stream closed. shutting down client handling stream.",); + tracing::error!("voters stream closed. shutting down client handling stream.",); return; }; diff --git a/node-metrics/src/service/data_state/mod.rs b/node-metrics/src/service/data_state/mod.rs index 7d7e549ac..e04155b30 100644 --- a/node-metrics/src/service/data_state/mod.rs +++ b/node-metrics/src/service/data_state/mod.rs @@ -181,15 +181,19 @@ pub fn create_block_detail_from_leaf(leaf: &Leaf) -> BlockDetail) -> std::fmt::Result { match self { - ProcessLeafError::SendError(err) => { + ProcessLeafError::BlockSendError(err) => { write!(f, "error sending block detail to sender: {}", err) } + ProcessLeafError::VotersSendError(err) => { + write!(f, "error sending voters to sender: {}", err) + } } } } @@ -197,7 +201,8 @@ impl std::fmt::Display for ProcessLeafError { impl std::error::Error for ProcessLeafError { fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { match self { - ProcessLeafError::SendError(err) => Some(err), + ProcessLeafError::BlockSendError(err) => Some(err), + ProcessLeafError::VotersSendError(err) => Some(err), } } } @@ -293,12 +298,12 @@ where if let Err(err) = block_sender.send(block_detail_copy).await { // We have an error that prevents us from continuing - return Err(ProcessLeafError::SendError(err)); + return Err(ProcessLeafError::BlockSendError(err)); } if let Err(err) = voters_sender.send(voters_bitvec).await { // We have an error that prevents us from continuing - return Err(ProcessLeafError::SendError(err)); + return Err(ProcessLeafError::VotersSendError(err)); } Ok(()) @@ -360,7 +365,7 @@ impl ProcessLeafStreamTask { leaf } else { // We have reached the end of the stream - tracing::info!("process leaf stream: end of stream reached for leaf stream."); + tracing::error!("process leaf stream: end of stream reached for leaf stream."); return; }; @@ -373,8 +378,19 @@ impl ProcessLeafStreamTask { .await { // We have an error that prevents us from continuing - tracing::info!("process leaf stream: error processing leaf: {}", err); - break; + tracing::error!("process leaf stream: error processing leaf: {}", err); + + // At the moment, all underlying errors are due to `SendError` + // which will ultimately mean that further processing attempts + // will fail, and be fruitless. + match err { + ProcessLeafError::BlockSendError(_) => { + panic!("ProcessLeafStreamTask: process_incoming_leaf failed, underlying sink is closed, blocks will stagnate: {}", err) + } + ProcessLeafError::VotersSendError(_) => { + panic!("ProcessLeafStreamTask: process_incoming_leaf failed, underlying sink is closed, voters will stagnate: {}", err) + } + } } } } @@ -509,11 +525,16 @@ impl ProcessNodeIdentityStreamTask { .await { // We have an error that prevents us from continuing - tracing::info!( + tracing::error!( "process node identity stream: error processing node identity: {}", err ); - break; + + // The only underlying class of errors that can be returned from + // `process_incoming_node_identity` are due to `SendError` which + // will ultimately mean that further processing attempts will fail + // and be fruitless. + panic!("ProcessNodeIdentityStreamTask: process_incoming_node_identity failed, underlying sink is closed, node identities will stagnate: {}", err); } } } @@ -558,11 +579,11 @@ mod tests { assert!(receive_result.is_err()); let err = receive_result.unwrap_err(); - let process_leaf_err = super::ProcessLeafError::SendError(err); + let process_leaf_err = super::ProcessLeafError::BlockSendError(err); assert_eq!( format!("{:?}", process_leaf_err), - "SendError(SendError { kind: Disconnected })" + "BlockSendError(SendError { kind: Disconnected })" ); } From e2a2180bd9e55278ee68b610aee33a19ae20f951 Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Fri, 2 Aug 2024 09:36:16 -0600 Subject: [PATCH 68/72] Fix Identity value population From discussion with @jbearer https://github.com/EspressoSystems/espresso-sequencer/pull/1771#discussion_r1701015549 Based on the converesation with Jeb the default value can be utilized to populate the operating system value from the environment directly. While implementing this fix, it was also discovered that the previous way of populating the Identity using `Default::default` actually prevented the program from running at all due to bad initialization. I haven't determined the exact reason, but by switching back to default_value population, we can revert to using the `flatten` option, and the issue seems to be fixed as a result. --- sequencer/src/options.rs | 38 +++++++------------------------------- 1 file changed, 7 insertions(+), 31 deletions(-) diff --git a/sequencer/src/options.rs b/sequencer/src/options.rs index 6137675db..e6a34c123 100644 --- a/sequencer/src/options.rs +++ b/sequencer/src/options.rs @@ -211,7 +211,7 @@ pub struct Options { #[clap(flatten)] pub logging: logging::Config, - #[clap(skip)] + #[clap(flatten)] pub identity: Identity, } @@ -266,42 +266,18 @@ pub struct Identity { pub company_name: Option, #[clap(long, env = "ESPRESSO_SEQUENCER_IDENTITY_COMPANY_WEBSITE")] pub company_website: Option, - #[clap(long, env = "ESPRESSO_SEQUENCER_IDENTITY_OPERATING_SYSTEM")] + #[clap(long, env = "ESPRESSO_SEQUENCER_IDENTITY_OPERATING_SYSTEM", default_value = std::env::consts::OS)] pub operating_system: Option, - #[clap(long, env = "ESPRESSO_SEQUENCER_IDENTITY_NODE_TYPE")] + #[clap(long, env = "ESPRESSO_SEQUENCER_IDENTITY_NODE_TYPE", default_value = get_default_node_type())] pub node_type: Option, #[clap(long, env = "ESPRESSO_SEQUENCER_IDENTITY_NETWORK_TYPE")] pub network_type: Option, } -impl Default for Identity { - fn default() -> Self { - let Identity { - country_code, - latitude, - longitude, - node_name, - wallet_address, - company_name, - company_website, - operating_system, - node_type, - network_type, - } = Self::parse(); - - Self { - country_code, - latitude, - longitude, - node_name, - wallet_address, - company_name, - company_website, - operating_system: operating_system.or(Some(std::env::consts::OS.to_string())), - node_type: node_type.or(Some(format!("sequencer-{}", env!("CARGO_PKG_VERSION")))), - network_type, - } - } +/// get_default_node_type returns the current public facing binary name and +/// version of this program. +fn get_default_node_type() -> String { + format!("espresso-sequencer {}", env!("CARGO_PKG_VERSION")) } // The Debug implementation for Url is noisy, we just want to see the URL From 0ef688df10e905f97c4f0ecb6c2d54b2d7014f7d Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Fri, 2 Aug 2024 09:56:24 -0600 Subject: [PATCH 69/72] Remove wallet_address from NodeIdentity Discussion with @jbearer: https://github.com/EspressoSystems/espresso-sequencer/pull/1771#discussion_r1701001580 Right now we don't have a meaningful definition of what an address for a Sequencer is. As a result it makes more sense to omit the wallet address field rather than to keep it in an effort to create a forward compatible definition. This commit removes the definition, population, and representation of the wallet address from both the sequencer, and from the node-validator api. This will require a change in the Node Validator UI, as it expects a wallet_address to be present. --- node-metrics/src/api/node_validator/v0/mod.rs | 27 ++----------------- node-metrics/src/service/client_state/mod.rs | 3 --- node-metrics/src/service/data_state/mod.rs | 4 +-- .../src/service/data_state/node_identity.rs | 18 ------------- sequencer/src/lib.rs | 2 -- sequencer/src/options.rs | 2 -- 6 files changed, 3 insertions(+), 53 deletions(-) diff --git a/node-metrics/src/api/node_validator/v0/mod.rs b/node-metrics/src/api/node_validator/v0/mod.rs index 2f3cb5e7d..0cae3c51c 100644 --- a/node-metrics/src/api/node_validator/v0/mod.rs +++ b/node-metrics/src/api/node_validator/v0/mod.rs @@ -5,7 +5,7 @@ use crate::service::client_message::{ClientMessage, InternalClientMessage}; use crate::service::data_state::{LocationDetails, NodeIdentity}; use crate::service::server_message::ServerMessage; use async_std::task::JoinHandle; -use espresso_types::{FeeAccount, SeqTypes}; +use espresso_types::SeqTypes; use futures::channel::mpsc::SendError; use futures::future::Either; use futures::{ @@ -629,21 +629,6 @@ fn populate_node_identity_general_from_scrape( .labels .get("operating_system") .map(|s| s.into()); - // Wallet Address - let parsed_wallet_address_result = node_identity_general_sample - .labels - .get("wallet") - .map(FeeAccount::from_str); - - match parsed_wallet_address_result { - Some(Ok(parsed_wallet_address)) => { - node_identity.wallet_address = Some(parsed_wallet_address); - } - Some(Err(err)) => { - tracing::info!("parsing wallet address failed: {}", err); - } - None => {} - } } /// [populate_node_location_from_scrape] populates the location information of a @@ -911,11 +896,7 @@ impl Drop for ProcessNodeIdentityUrlStreamTask { #[cfg(test)] mod tests { - use espresso_types::FeeAccount; - use std::{ - io::{BufRead, BufReader}, - str::FromStr, - }; + use std::io::{BufRead, BufReader}; fn example_prometheus_output() -> &'static str { include_str!("example_prometheus_metrics_output.txt") @@ -1037,10 +1018,6 @@ mod tests { node_identity.operating_system(), &Some("Linux 5.15.153.1".to_string()) ); - assert_eq!( - node_identity.wallet_address(), - &Some(FeeAccount::from_str("0x0000000000000000000000000000000000000000").unwrap()) - ); assert!(node_identity.location().is_some()); let node_identity_location = node_identity.location().unwrap(); diff --git a/node-metrics/src/service/client_state/mod.rs b/node-metrics/src/service/client_state/mod.rs index ab0767388..99c7e33ca 100644 --- a/node-metrics/src/service/client_state/mod.rs +++ b/node-metrics/src/service/client_state/mod.rs @@ -1223,7 +1223,6 @@ pub mod tests { NodeIdentity::new( pub_key, Some("a".to_string()), - Some(Default::default()), Some("http://localhost/".parse().unwrap()), Some("company".to_string()), Some("https://example.com/".parse().unwrap()), @@ -1242,7 +1241,6 @@ pub mod tests { NodeIdentity::new( pub_key, Some("b".to_string()), - Some(Default::default()), Some("http://localhost/".parse().unwrap()), Some("company".to_string()), Some("https://example.com/".parse().unwrap()), @@ -1261,7 +1259,6 @@ pub mod tests { NodeIdentity::new( pub_key, Some("b".to_string()), - Some(Default::default()), Some("http://localhost/".parse().unwrap()), Some("company".to_string()), Some("https://example.com/".parse().unwrap()), diff --git a/node-metrics/src/service/data_state/mod.rs b/node-metrics/src/service/data_state/mod.rs index e04155b30..6c7b289f8 100644 --- a/node-metrics/src/service/data_state/mod.rs +++ b/node-metrics/src/service/data_state/mod.rs @@ -559,8 +559,7 @@ mod tests { }; use async_std::{prelude::FutureExt, sync::RwLock}; use espresso_types::{ - v0_3::ChainConfig, BlockMerkleTree, FeeAccount, FeeMerkleTree, Leaf, NodeState, - ValidatedState, + v0_3::ChainConfig, BlockMerkleTree, FeeMerkleTree, Leaf, NodeState, ValidatedState, }; use futures::{channel::mpsc, SinkExt, StreamExt}; use hotshot_types::{signature_key::BLSPubKey, traits::signature_key::SignatureKey}; @@ -723,7 +722,6 @@ mod tests { let node_identity_1 = NodeIdentity::new( public_key_1, Some("name".to_string()), - Some(FeeAccount::default()), Some(Url::parse("https://example.com/").unwrap()), Some("company".to_string()), Some(Url::parse("https://example.com/").unwrap()), diff --git a/node-metrics/src/service/data_state/node_identity.rs b/node-metrics/src/service/data_state/node_identity.rs index d7b0c4ed8..8396a8134 100644 --- a/node-metrics/src/service/data_state/node_identity.rs +++ b/node-metrics/src/service/data_state/node_identity.rs @@ -1,5 +1,4 @@ use super::LocationDetails; -use espresso_types::FeeAccount; use hotshot_types::signature_key::BLSPubKey; use serde::{Deserialize, Serialize}; use surf_disco::Url; @@ -10,7 +9,6 @@ use surf_disco::Url; pub struct NodeIdentity { pub(crate) public_key: BLSPubKey, pub(crate) name: Option, - pub(crate) wallet_address: Option, pub(crate) public_url: Option, pub(crate) company: Option, pub(crate) company_website: Option, @@ -54,7 +52,6 @@ impl NodeIdentity { pub fn new( public_key: BLSPubKey, name: Option, - wallet_address: Option, public_url: Option, company: Option, company_website: Option, @@ -66,7 +63,6 @@ impl NodeIdentity { Self { public_key, name, - wallet_address, public_url, company, company_website, @@ -85,10 +81,6 @@ impl NodeIdentity { &self.name } - pub fn wallet_address(&self) -> &Option { - &self.wallet_address - } - pub fn public_url(&self) -> &Option { &self.public_url } @@ -121,7 +113,6 @@ impl NodeIdentity { Self { public_key, name: None, - wallet_address: None, public_url: None, company: None, company_website: None, @@ -146,7 +137,6 @@ pub mod tests { NodeIdentity::new( pub_key, Some("a".to_string()), - Some(Default::default()), Some("https://espressosys.com/".parse().unwrap()), Some("company".to_string()), Some("https://example.com/".parse().unwrap()), @@ -210,14 +200,6 @@ pub mod tests { assert_eq!(name, &Some("a".to_string())); } - #[test] - fn test_node_identity_wallet_address() { - let node_identity = create_test_node(1); - let wallet_address = node_identity.wallet_address(); - - assert_eq!(wallet_address, &Some(Default::default())); - } - #[test] fn test_node_identity_public_url() { let node_identity = create_test_node(1); diff --git a/sequencer/src/lib.rs b/sequencer/src/lib.rs index d5fdfc331..95662a9b6 100644 --- a/sequencer/src/lib.rs +++ b/sequencer/src/lib.rs @@ -152,7 +152,6 @@ pub async fn init_node( "node_identity_general".into(), vec![ "name".into(), - "wallet".into(), "company_name".into(), "company_website".into(), "operating_system".into(), @@ -162,7 +161,6 @@ pub async fn init_node( ) .create(vec![ identity.node_name.unwrap_or("".into()), - identity.wallet_address.unwrap_or("".into()), identity.company_name.unwrap_or("".into()), identity .company_website diff --git a/sequencer/src/options.rs b/sequencer/src/options.rs index e6a34c123..5e367c201 100644 --- a/sequencer/src/options.rs +++ b/sequencer/src/options.rs @@ -260,8 +260,6 @@ pub struct Identity { #[clap(long, env = "ESPRESSO_SEQUENCER_IDENTITY_NODE_NAME")] pub node_name: Option, - #[clap(long, env = "ESPRESSO_SEQUENCER_IDENTITY_WALLET_ADDRESS")] - pub wallet_address: Option, #[clap(long, env = "ESPRESSO_SEQUENCER_IDENTITY_COMPANY_NAME")] pub company_name: Option, #[clap(long, env = "ESPRESSO_SEQUENCER_IDENTITY_COMPANY_WEBSITE")] From 07c472236227d7d8ed73730d022ba675fafb9e9e Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Fri, 2 Aug 2024 10:00:53 -0600 Subject: [PATCH 70/72] Add /v0/ to node_validator in process-compose.yaml Add /v0/ to node-validator in docker-compose.yaml Fix `depends_on` conditions in docker-compose.yaml --- docker-compose.yaml | 14 +++++++------- process-compose.yaml | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index f9c0395b3..aff22c3bb 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -598,20 +598,20 @@ services: ports: - "$ESPRESSO_NODE_VALIDATOR_PORT:$ESPRESSO_NODE_VALIDATOR_PORT" environment: - - ESPRESSO_NODE_VALIDATOR_STAKE_TABLE_SOURCE_BASE_URL=http://sequencer0:$ESPRESSO_SEQUENCER_API_PORT - - ESPRESSO_NODE_VALIDATOR_LEAF_STREAM_SOURCE_BASE_URL=http://sequencer0:$ESPRESSO_SEQUENCER_API_PORT + - ESPRESSO_NODE_VALIDATOR_STAKE_TABLE_SOURCE_BASE_URL=http://sequencer0:$ESPRESSO_SEQUENCER_API_PORT/v0/ + - ESPRESSO_NODE_VALIDATOR_LEAF_STREAM_SOURCE_BASE_URL=http://sequencer0:$ESPRESSO_SEQUENCER_API_PORT/v0/ - ESPRESSO_NODE_VALIDATOR_INITIAL_NODE_PUBLIC_BASE_URLS=http://sequencer0:$ESPRESSO_SEQUENCER_API_PORT,http://sequencer1:$ESPRESSO_SEQUENCER1_API_PORT,http://sequencer2:$ESPRESSO_SEQUENCER2_API_PORT,http://sequencer3:$ESPRESSO_SEQUENCER3_API_PORT,http://sequencer4:$ESPRESSO_SEQUENCER4_API_PORT depends_on: sequencer0: - condition: process_healthy + condition: service_healthy sequencer1: - condition: process_healthy + condition: service_healthy sequencer2: - condition: process_healthy + condition: service_healthy sequencer3: - condition: process_healthy + condition: service_healthy sequencer4: - condition: process_healthy + condition: service_healthy sequencer-db-0: image: postgres diff --git a/process-compose.yaml b/process-compose.yaml index 2aa28e909..0dc060042 100644 --- a/process-compose.yaml +++ b/process-compose.yaml @@ -336,8 +336,8 @@ processes: node_validator: command: node-metrics -- environment: - - ESPRESSO_NODE_VALIDATOR_STAKE_TABLE_SOURCE_BASE_URL=http://localhost:$ESPRESSO_SEQUENCER_API_PORT - - ESPRESSO_NODE_VALIDATOR_LEAF_STREAM_SOURCE_BASE_URL=http://localhost:$ESPRESSO_SEQUENCER_API_PORT + - ESPRESSO_NODE_VALIDATOR_STAKE_TABLE_SOURCE_BASE_URL=http://localhost:$ESPRESSO_SEQUENCER_API_PORT/v0/ + - ESPRESSO_NODE_VALIDATOR_LEAF_STREAM_SOURCE_BASE_URL=http://localhost:$ESPRESSO_SEQUENCER_API_PORT/v0/ - ESPRESSO_NODE_VALIDATOR_INITIAL_NODE_PUBLIC_BASE_URLS=http://localhost:$ESPRESSO_SEQUENCER_API_PORT,http://localhost:$ESPRESSO_SEQUENCER1_API_PORT,http://localhost:$ESPRESSO_SEQUENCER2_API_PORT,http://localhost:$ESPRESSO_SEQUENCER3_API_PORT,http://localhost:$ESPRESSO_SEQUENCER4_API_PORT depends_on: broker_0: From 6bbcb6e9ce95d8d443c276a3134e46a63e126f3f Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Fri, 2 Aug 2024 12:21:38 -0600 Subject: [PATCH 71/72] Remove task joins from tests The task joins were causing the tests to fail due to the closed channels resulting in a panic. Since this is desired behavior, the tests for them have been removed from the tests so as not to cause failures due to desired panic behavior. --- node-metrics/src/service/client_state/mod.rs | 272 ++----------------- 1 file changed, 16 insertions(+), 256 deletions(-) diff --git a/node-metrics/src/service/client_state/mod.rs b/node-metrics/src/service/client_state/mod.rs index 99c7e33ca..4a47b063a 100644 --- a/node-metrics/src/service/client_state/mod.rs +++ b/node-metrics/src/service/client_state/mod.rs @@ -1286,30 +1286,12 @@ pub mod tests { let client_thread_state = Arc::new(RwLock::new(create_test_client_thread_state())); let data_state = Arc::new(RwLock::new(data_state)); - let (mut internal_client_message_sender, internal_client_message_receiver) = - mpsc::channel(1); - let mut process_internal_client_message_handle = InternalClientMessageProcessingTask::new( + let (_internal_client_message_sender, internal_client_message_receiver) = mpsc::channel(1); + let _process_internal_client_message_handle = InternalClientMessageProcessingTask::new( internal_client_message_receiver, data_state, client_thread_state, ); - - // disconnect the last internal client message sender - internal_client_message_sender.disconnect(); - - // Join the async task. - if let Err(timeout_error) = process_internal_client_message_handle - .task_handle - .take() - .unwrap() - .timeout(Duration::from_millis(200)) - .await - { - panic!( - "process_internal_client_message_handle did not complete in time, error: {}", - timeout_error - ); - } } #[async_std::test] @@ -1326,7 +1308,7 @@ pub mod tests { let (internal_client_message_sender, internal_client_message_receiver) = mpsc::channel(1); let (server_message_sender_1, mut server_message_receiver_1) = mpsc::channel(1); let (server_message_sender_2, mut server_message_receiver_2) = mpsc::channel(1); - let mut process_internal_client_message_handle = InternalClientMessageProcessingTask::new( + let _process_internal_client_message_handle = InternalClientMessageProcessingTask::new( internal_client_message_receiver, data_state, client_thread_state, @@ -1377,28 +1359,6 @@ pub mod tests { voters_1, voters_2 ]))), ); - - // disconnect the internal client message sender - internal_client_message_sender_1.disconnect(); - internal_client_message_sender_2.disconnect(); - - // The server message receiver should be shutdown, and should return - // nothing further - assert_eq!(server_message_receiver_1.next().await, None); - assert_eq!(server_message_receiver_2.next().await, None); - - if let Err(timeout_error) = process_internal_client_message_handle - .task_handle - .take() - .unwrap() - .timeout(Duration::from_millis(200)) - .await - { - panic!( - "process_internal_client_message_handle did not complete in time, error: {}", - timeout_error - ); - } } #[async_std::test] @@ -1418,7 +1378,7 @@ pub mod tests { let (internal_client_message_sender, internal_client_message_receiver) = mpsc::channel(1); let (server_message_sender_1, mut server_message_receiver_1) = mpsc::channel(1); let (server_message_sender_2, mut server_message_receiver_2) = mpsc::channel(1); - let mut process_internal_client_message_handle = InternalClientMessageProcessingTask::new( + let _process_internal_client_message_handle = InternalClientMessageProcessingTask::new( internal_client_message_receiver, data_state, client_thread_state, @@ -1467,29 +1427,6 @@ pub mod tests { server_message_receiver_1.next().await, Some(ServerMessage::BlocksSnapshot(Arc::new(vec![block_1]))), ); - - // disconnect the internal client message sender - internal_client_message_sender_1.disconnect(); - internal_client_message_sender_2.disconnect(); - - // The server message receiver should be shutdown, and should return - // nothing further - assert_eq!(server_message_receiver_1.next().await, None); - assert_eq!(server_message_receiver_2.next().await, None); - - // Join the async task. - if let Err(timeout_error) = process_internal_client_message_handle - .task_handle - .take() - .unwrap() - .timeout(Duration::from_millis(200)) - .await - { - panic!( - "process_internal_client_message_handle did not complete in time, error: {}", - timeout_error - ); - } } #[async_std::test] @@ -1501,7 +1438,7 @@ pub mod tests { let (internal_client_message_sender, internal_client_message_receiver) = mpsc::channel(1); let (server_message_sender_1, mut server_message_receiver_1) = mpsc::channel(1); let (server_message_sender_2, mut server_message_receiver_2) = mpsc::channel(1); - let mut process_internal_client_message_handle = InternalClientMessageProcessingTask::new( + let _process_internal_client_message_handle = InternalClientMessageProcessingTask::new( internal_client_message_receiver, data_state, client_thread_state, @@ -1555,29 +1492,6 @@ pub mod tests { node_3.clone() ]))), ); - - // disconnect the last internal client message sender - internal_client_message_sender_1.disconnect(); - internal_client_message_sender_2.disconnect(); - - // The server message receiver should be shutdown, and should return - // nothing further - assert_eq!(server_message_receiver_1.next().await, None); - assert_eq!(server_message_receiver_2.next().await, None); - - // Join the async task. - if let Err(timeout_error) = process_internal_client_message_handle - .task_handle - .take() - .unwrap() - .timeout(Duration::from_millis(200)) - .await - { - panic!( - "process_internal_client_message_handle did not complete in time, error: {}", - timeout_error - ); - } } #[async_std::test] @@ -1593,22 +1507,21 @@ pub mod tests { let (server_message_sender_1, mut server_message_receiver_1) = mpsc::channel(1); let (server_message_sender_2, mut server_message_receiver_2) = mpsc::channel(1); let (server_message_sender_3, mut server_message_receiver_3) = mpsc::channel(1); - let mut process_internal_client_message_handle = InternalClientMessageProcessingTask::new( + let _process_internal_client_message_handle = InternalClientMessageProcessingTask::new( internal_client_message_receiver, data_state.clone(), client_thread_state.clone(), ); - let mut process_distribute_block_detail_handle = - ProcessDistributeBlockDetailHandlingTask::new( - client_thread_state.clone(), - block_detail_receiver, - ); + let _process_distribute_block_detail_handle = ProcessDistributeBlockDetailHandlingTask::new( + client_thread_state.clone(), + block_detail_receiver, + ); - let mut process_distribute_voters_handle = + let _process_distribute_voters_handle = ProcessDistributeVotersHandlingTask::new(client_thread_state, voters_receiver); - let mut process_leaf_stream_handle = ProcessLeafStreamTask::new( + let _process_leaf_stream_handle = ProcessLeafStreamTask::new( leaf_receiver, data_state, block_detail_sender, @@ -1698,75 +1611,6 @@ pub mod tests { server_message_receiver_2.next().await, Some(ServerMessage::LatestBlock(arc_expected_block.clone())) ); - - // disconnect the leaf sender - leaf_sender.disconnect(); - - // Join the async task. - if let Err(timeout_error) = process_leaf_stream_handle - .task_handle - .take() - .unwrap() - .timeout(Duration::from_millis(200)) - .await - { - panic!( - "process_leaf_stream_handle did not complete in time, error: {}", - timeout_error - ); - } - - // Join the async task. - if let Err(timeout_error) = process_distribute_block_detail_handle - .task_handle - .take() - .unwrap() - .timeout(Duration::from_millis(200)) - .await - { - panic!( - "process_distribute_client_handling_handle did not complete in time, error: {}", - timeout_error - ); - } - - if let Err(timeout_error) = process_distribute_voters_handle - .task_handle - .take() - .unwrap() - .timeout(Duration::from_millis(200)) - .await - { - panic!( - "process_distribute_voters_handle did not complete in time, error: {}", - timeout_error - ); - } - - // disconnect the last internal client message sender - internal_client_message_sender_1.disconnect(); - internal_client_message_sender_2.disconnect(); - internal_client_message_sender_3.disconnect(); - - // The server message receiver should be shutdown, and should return - // nothing further - assert_eq!(server_message_receiver_1.next().await, None); - assert_eq!(server_message_receiver_2.next().await, None); - assert_eq!(server_message_receiver_3.next().await, None); - - // Join the async task. - if let Err(timeout_error) = process_internal_client_message_handle - .task_handle - .take() - .unwrap() - .timeout(Duration::from_millis(200)) - .await - { - panic!( - "process_internal_client_message_handle did not complete in time, error: {}", - timeout_error - ); - } } #[async_std::test] @@ -1780,13 +1624,13 @@ pub mod tests { let (server_message_sender_1, mut server_message_receiver_1) = mpsc::channel(1); let (server_message_sender_2, mut server_message_receiver_2) = mpsc::channel(1); let (server_message_sender_3, mut server_message_receiver_3) = mpsc::channel(1); - let mut process_internal_client_message_handle = InternalClientMessageProcessingTask::new( + let _process_internal_client_message_handle = InternalClientMessageProcessingTask::new( internal_client_message_receiver, data_state.clone(), client_thread_state.clone(), ); - let mut process_distribute_node_identity_handle = + let _process_distribute_node_identity_handle = ProcessDistributeNodeIdentityHandlingTask::new( client_thread_state, node_identity_receiver, @@ -1877,48 +1721,6 @@ pub mod tests { server_message_receiver_2.next().await, Some(ServerMessage::LatestNodeIdentity(arc_node_identity.clone())) ); - - // disconnect the leaf sender - node_identity_sender.disconnect(); - - // Join the async task. - if let Err(timeout_error) = process_distribute_node_identity_handle - .task_handle - .take() - .unwrap() - .timeout(Duration::from_millis(200)) - .await - { - panic!( - "process_distribute_node_identity_handle did not complete in time, error: {}", - timeout_error - ); - } - - // disconnect the last internal client message sender - internal_client_message_sender_1.disconnect(); - internal_client_message_sender_2.disconnect(); - internal_client_message_sender_3.disconnect(); - - // The server message receiver should be shutdown, and should return - // nothing further - assert_eq!(server_message_receiver_1.next().await, None); - assert_eq!(server_message_receiver_2.next().await, None); - assert_eq!(server_message_receiver_3.next().await, None); - - // Join the async task. - if let Err(timeout_error) = process_internal_client_message_handle - .task_handle - .take() - .unwrap() - .timeout(Duration::from_millis(200)) - .await - { - panic!( - "process_internal_client_message_handle did not complete in time, error: {}", - timeout_error - ); - } } #[async_std::test] @@ -1932,13 +1734,13 @@ pub mod tests { let (server_message_sender_1, mut server_message_receiver_1) = mpsc::channel(1); let (server_message_sender_2, mut server_message_receiver_2) = mpsc::channel(1); let (server_message_sender_3, mut server_message_receiver_3) = mpsc::channel(1); - let mut process_internal_client_message_handle = InternalClientMessageProcessingTask::new( + let _process_internal_client_message_handle = InternalClientMessageProcessingTask::new( internal_client_message_receiver, data_state.clone(), client_thread_state.clone(), ); - let mut process_distribute_voters_handle = + let _process_distribute_voters_handle = ProcessDistributeVotersHandlingTask::new(client_thread_state, voters_receiver); // Send a Connected Message to the server @@ -2021,48 +1823,6 @@ pub mod tests { server_message_receiver_2.next().await, Some(ServerMessage::LatestVoters(voters.clone())) ); - - // disconnect the leaf sender - voters_sender.disconnect(); - - // Join the async task. - if let Err(timeout_error) = process_distribute_voters_handle - .task_handle - .take() - .unwrap() - .timeout(Duration::from_millis(200)) - .await - { - panic!( - "process_distribute_voters_handle did not complete in time, error: {}", - timeout_error - ); - } - - // disconnect the last internal client message sender - internal_client_message_sender_1.disconnect(); - internal_client_message_sender_2.disconnect(); - internal_client_message_sender_3.disconnect(); - - // The server message receiver should be shutdown, and should return - // nothing further - assert_eq!(server_message_receiver_1.next().await, None); - assert_eq!(server_message_receiver_2.next().await, None); - assert_eq!(server_message_receiver_3.next().await, None); - - // Join the async task. - if let Err(timeout_error) = process_internal_client_message_handle - .task_handle - .take() - .unwrap() - .timeout(Duration::from_millis(200)) - .await - { - panic!( - "process_internal_client_message_handle did not complete in time, error: {}", - timeout_error - ); - } } // The following tests codify assumptions being bad on behalf of the Sink From 355093c3e20e558422fa9d27937ffb62d36ff2d6 Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Mon, 5 Aug 2024 10:51:51 -0600 Subject: [PATCH 72/72] Add cancels to async tasks in tests to avoid panics --- node-metrics/src/service/client_state/mod.rs | 92 +++++++++++++++++--- node-metrics/src/service/data_state/mod.rs | 14 ++- 2 files changed, 83 insertions(+), 23 deletions(-) diff --git a/node-metrics/src/service/client_state/mod.rs b/node-metrics/src/service/client_state/mod.rs index 4a47b063a..53092c13a 100644 --- a/node-metrics/src/service/client_state/mod.rs +++ b/node-metrics/src/service/client_state/mod.rs @@ -1378,7 +1378,7 @@ pub mod tests { let (internal_client_message_sender, internal_client_message_receiver) = mpsc::channel(1); let (server_message_sender_1, mut server_message_receiver_1) = mpsc::channel(1); let (server_message_sender_2, mut server_message_receiver_2) = mpsc::channel(1); - let _process_internal_client_message_handle = InternalClientMessageProcessingTask::new( + let mut process_internal_client_message_handle = InternalClientMessageProcessingTask::new( internal_client_message_receiver, data_state, client_thread_state, @@ -1427,6 +1427,12 @@ pub mod tests { server_message_receiver_1.next().await, Some(ServerMessage::BlocksSnapshot(Arc::new(vec![block_1]))), ); + + if let Some(process_internal_client_message_handle) = + process_internal_client_message_handle.task_handle.take() + { + assert_eq!(process_internal_client_message_handle.cancel().await, None); + } } #[async_std::test] @@ -1438,7 +1444,7 @@ pub mod tests { let (internal_client_message_sender, internal_client_message_receiver) = mpsc::channel(1); let (server_message_sender_1, mut server_message_receiver_1) = mpsc::channel(1); let (server_message_sender_2, mut server_message_receiver_2) = mpsc::channel(1); - let _process_internal_client_message_handle = InternalClientMessageProcessingTask::new( + let mut process_internal_client_message_handle = InternalClientMessageProcessingTask::new( internal_client_message_receiver, data_state, client_thread_state, @@ -1492,6 +1498,12 @@ pub mod tests { node_3.clone() ]))), ); + + if let Some(process_internal_client_message_handle) = + process_internal_client_message_handle.task_handle.take() + { + assert_eq!(process_internal_client_message_handle.cancel().await, None); + } } #[async_std::test] @@ -1507,21 +1519,22 @@ pub mod tests { let (server_message_sender_1, mut server_message_receiver_1) = mpsc::channel(1); let (server_message_sender_2, mut server_message_receiver_2) = mpsc::channel(1); let (server_message_sender_3, mut server_message_receiver_3) = mpsc::channel(1); - let _process_internal_client_message_handle = InternalClientMessageProcessingTask::new( + let mut process_internal_client_message_handle = InternalClientMessageProcessingTask::new( internal_client_message_receiver, data_state.clone(), client_thread_state.clone(), ); - let _process_distribute_block_detail_handle = ProcessDistributeBlockDetailHandlingTask::new( - client_thread_state.clone(), - block_detail_receiver, - ); + let mut process_distribute_block_detail_handle = + ProcessDistributeBlockDetailHandlingTask::new( + client_thread_state.clone(), + block_detail_receiver, + ); - let _process_distribute_voters_handle = + let mut process_distribute_voters_handle = ProcessDistributeVotersHandlingTask::new(client_thread_state, voters_receiver); - let _process_leaf_stream_handle = ProcessLeafStreamTask::new( + let mut process_leaf_stream_handle = ProcessLeafStreamTask::new( leaf_receiver, data_state, block_detail_sender, @@ -1611,6 +1624,34 @@ pub mod tests { server_message_receiver_2.next().await, Some(ServerMessage::LatestBlock(arc_expected_block.clone())) ); + + if server_message_receiver_3 + .next() + .timeout(Duration::from_millis(10)) + .await + .is_ok() + { + panic!("receiver 3 should not have received the latest block."); + } + + if let Some(process_internal_client_message_handle) = + process_internal_client_message_handle.task_handle.take() + { + assert_eq!(process_internal_client_message_handle.cancel().await, None); + } + if let Some(process_distribute_block_detail_handle) = + process_distribute_block_detail_handle.task_handle.take() + { + assert_eq!(process_distribute_block_detail_handle.cancel().await, None); + } + if let Some(process_distribute_voters_handle) = + process_distribute_voters_handle.task_handle.take() + { + assert_eq!(process_distribute_voters_handle.cancel().await, None); + } + if let Some(process_leaf_stream_handle) = process_leaf_stream_handle.task_handle.take() { + assert_eq!(process_leaf_stream_handle.cancel().await, None); + } } #[async_std::test] @@ -1624,13 +1665,13 @@ pub mod tests { let (server_message_sender_1, mut server_message_receiver_1) = mpsc::channel(1); let (server_message_sender_2, mut server_message_receiver_2) = mpsc::channel(1); let (server_message_sender_3, mut server_message_receiver_3) = mpsc::channel(1); - let _process_internal_client_message_handle = InternalClientMessageProcessingTask::new( + let mut process_internal_client_message_handle = InternalClientMessageProcessingTask::new( internal_client_message_receiver, data_state.clone(), client_thread_state.clone(), ); - let _process_distribute_node_identity_handle = + let mut process_distribute_node_identity_handle = ProcessDistributeNodeIdentityHandlingTask::new( client_thread_state, node_identity_receiver, @@ -1668,7 +1709,7 @@ pub mod tests { ); // Send another Connected Message to the server - let mut internal_client_message_sender_3 = internal_client_message_sender; + let mut internal_client_message_sender_3 = internal_client_message_sender.clone(); assert_eq!( internal_client_message_sender_3 .send(InternalClientMessage::Connected(server_message_sender_3)) @@ -1721,6 +1762,18 @@ pub mod tests { server_message_receiver_2.next().await, Some(ServerMessage::LatestNodeIdentity(arc_node_identity.clone())) ); + + if let Some(process_internal_client_message_handle) = + process_internal_client_message_handle.task_handle.take() + { + assert_eq!(process_internal_client_message_handle.cancel().await, None); + } + + if let Some(process_distribute_node_identity_handle) = + process_distribute_node_identity_handle.task_handle.take() + { + assert_eq!(process_distribute_node_identity_handle.cancel().await, None); + } } #[async_std::test] @@ -1734,13 +1787,13 @@ pub mod tests { let (server_message_sender_1, mut server_message_receiver_1) = mpsc::channel(1); let (server_message_sender_2, mut server_message_receiver_2) = mpsc::channel(1); let (server_message_sender_3, mut server_message_receiver_3) = mpsc::channel(1); - let _process_internal_client_message_handle = InternalClientMessageProcessingTask::new( + let mut process_internal_client_message_handle = InternalClientMessageProcessingTask::new( internal_client_message_receiver, data_state.clone(), client_thread_state.clone(), ); - let _process_distribute_voters_handle = + let mut process_distribute_voters_handle = ProcessDistributeVotersHandlingTask::new(client_thread_state, voters_receiver); // Send a Connected Message to the server @@ -1823,6 +1876,17 @@ pub mod tests { server_message_receiver_2.next().await, Some(ServerMessage::LatestVoters(voters.clone())) ); + + if let Some(process_internal_client_message_handle) = + process_internal_client_message_handle.task_handle.take() + { + assert_eq!(process_internal_client_message_handle.cancel().await, None); + } + if let Some(process_distribute_voters_handle) = + process_distribute_voters_handle.task_handle.take() + { + assert_eq!(process_distribute_voters_handle.cancel().await, None); + } } // The following tests codify assumptions being bad on behalf of the Sink diff --git a/node-metrics/src/service/data_state/mod.rs b/node-metrics/src/service/data_state/mod.rs index 6c7b289f8..3a4dea838 100644 --- a/node-metrics/src/service/data_state/mod.rs +++ b/node-metrics/src/service/data_state/mod.rs @@ -777,14 +777,10 @@ mod tests { // We explicitly drop these, as it should make the task clean up. drop(node_identity_sender_1); - assert_eq!( - process_node_identity_task_handle - .task_handle - .take() - .unwrap() - .timeout(Duration::from_millis(200)) - .await, - Ok(()) - ); + if let Some(process_node_identity_task_handle) = + process_node_identity_task_handle.task_handle.take() + { + assert_eq!(process_node_identity_task_handle.cancel().await, None); + } } }